diff options
| author | serpilliere <devnull@localhost> | 2014-06-03 10:27:56 +0200 |
|---|---|---|
| committer | serpilliere <devnull@localhost> | 2014-06-03 10:27:56 +0200 |
| commit | ed5c3668cc9f545b52674ad699fc2b0ed1ccb575 (patch) | |
| tree | 07faf97d7e4d083173a1f7e1bfd249baed2d74f9 /miasm2/analysis | |
| parent | a183e1ebd525453710306695daa8c410fd0cb2af (diff) | |
| download | miasm-ed5c3668cc9f545b52674ad699fc2b0ed1ccb575.tar.gz miasm-ed5c3668cc9f545b52674ad699fc2b0ed1ccb575.zip | |
Miasm v2
* API has changed, so old scripts need updates * See example for API usage * Use tcc or llvm for jit emulation * Go to test and run test_all.py to check install Enjoy !
Diffstat (limited to '')
| -rw-r--r-- | miasm2/analysis/__init__.py | 0 | ||||
| -rw-r--r-- | miasm2/analysis/data_analysis.py | 326 | ||||
| -rw-r--r-- | miasm2/analysis/debugging.py | 479 | ||||
| -rw-r--r-- | miasm2/analysis/disasm_cb.py | 137 | ||||
| -rw-r--r-- | miasm2/analysis/gdbserver.py | 425 |
5 files changed, 1367 insertions, 0 deletions
diff --git a/miasm2/analysis/__init__.py b/miasm2/analysis/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/miasm2/analysis/__init__.py diff --git a/miasm2/analysis/data_analysis.py b/miasm2/analysis/data_analysis.py new file mode 100644 index 00000000..cb953399 --- /dev/null +++ b/miasm2/analysis/data_analysis.py @@ -0,0 +1,326 @@ +from miasm2.expression.expression import * +from miasm2.ir.symbexec import symbexec + + +def get_node_name(label, i, n): + # n_name = "%s_%d_%s"%(label.name, i, n) + n_name = (label, i, n) + return n_name + + +def intra_bloc_flow_raw(my_ir, flow_graph, irb): + """ + Create data flow for an irbloc using raw IR expressions + """ + in_nodes = {} + out_nodes = {} + current_nodes = {} + for i, exprs in enumerate(irb.irs): + list_rw = get_list_rw(exprs) + current_nodes.update(out_nodes) + + # gen mem arg to mem node links + all_mems = set() + for nodes_r, nodes_w in list_rw: + for n in nodes_r.union(nodes_w): + all_mems.update(get_expr_mem(n)) + if not all_mems: + continue + + # print [str(x) for x in all_mems] + for n in all_mems: + node_n_w = get_node_name(irb.label, i, n) + if not n in nodes_r: + continue + o_r = n.arg.get_r(mem_read=False, cst_read=True) + for n_r in o_r: + if n_r in current_nodes: + node_n_r = current_nodes[n_r] + else: + node_n_r = get_node_name(irb.label, i, n_r) + current_nodes[n_r] = node_n_r + in_nodes[n_r] = node_n_r + flow_graph.add_uniq_edge(node_n_r, node_n_w) + + # gen data flow links + for nodes_r, nodes_w in list_rw: + for n_r in nodes_r: + if n_r in current_nodes: + node_n_r = current_nodes[n_r] + else: + node_n_r = get_node_name(irb.label, i, n_r) + current_nodes[n_r] = node_n_r + in_nodes[n_r] = node_n_r + + flow_graph.add_node(node_n_r) + for n_w in nodes_w: + node_n_w = get_node_name(irb.label, i + 1, n_w) + out_nodes[n_w] = node_n_w + # current_nodes[n_w] = node_n_w + + flow_graph.add_node(node_n_w) + flow_graph.add_uniq_edge(node_n_r, node_n_w) + irb.in_nodes = in_nodes + irb.out_nodes = out_nodes + + +def intra_bloc_flow_symbexec(my_ir, flow_graph, irb): + """ + Create data flow for an irbloc using symbolic execution + """ + in_nodes = {} + out_nodes = {} + current_nodes = {} + + symbols_init = {} + for r in my_ir.arch.regs.all_regs_ids: + # symbols_init[r] = my_ir.arch.regs.all_regs_ids_init[i] + x = ExprId(r.name, r.size) + x.is_term = True + symbols_init[r] = x + + sb = symbexec(my_ir.arch, dict(symbols_init)) + sb.emulbloc(irb) + # print "*"*40 + # print irb + # print sb.dump_id() + # print sb.dump_mem() + + for n_w in sb.symbols: + # print n_w + v = sb.symbols[n_w] + if n_w in symbols_init and symbols_init[n_w] == v: + continue + read_values = v.get_r(cst_read=True) + # print n_w, v, [str(x) for x in read_values] + node_n_w = get_node_name(irb.label, len(irb.lines), n_w) + + for n_r in read_values: + if n_r in current_nodes: + node_n_r = current_nodes[n_r] + else: + node_n_r = get_node_name(irb.label, 0, n_r) + current_nodes[n_r] = node_n_r + in_nodes[n_r] = node_n_r + + out_nodes[n_w] = node_n_w + flow_graph.add_uniq_edge(node_n_r, node_n_w) + + irb.in_nodes = in_nodes + irb.out_nodes = out_nodes + + +def inter_bloc_flow_link(my_ir, flow_graph, todo, link_exec_to_data): + lbl, current_nodes, exec_nodes = todo + # print 'TODO' + # print lbl + # print [(str(x[0]), str(x[1])) for x in current_nodes] + current_nodes = dict(current_nodes) + + # link current nodes to bloc in_nodes + if not lbl in my_ir.blocs: + print "cannot find bloc!!", lbl + return set() + irb = my_ir.blocs[lbl] + # pp(('IN', lbl, [(str(x[0]), str(x[1])) for x in current_nodes.items()])) + to_del = set() + for n_r, node_n_r in irb.in_nodes.items(): + if not n_r in current_nodes: + continue + # print 'add link', current_nodes[n_r], node_n_r + flow_graph.add_uniq_edge(current_nodes[n_r], node_n_r) + to_del.add(n_r) + + # if link exec to data, all nodes depends on exec nodes + if link_exec_to_data: + for n_x_r in exec_nodes: + for n_r, node_n_r in irb.in_nodes.items(): + if not n_x_r in current_nodes: + continue + if isinstance(n_r, ExprInt): + continue + flow_graph.add_uniq_edge(current_nodes[n_x_r], node_n_r) + + # update current nodes using bloc out_nodes + for n_w, node_n_w in irb.out_nodes.items(): + current_nodes[n_w] = node_n_w + + # get nodes involved in exec flow + x_nodes = tuple(sorted(list(irb.dst.get_r()))) + + todo = set() + for lbl_dst in my_ir.g.successors(irb.label): + todo.add((lbl_dst, tuple(current_nodes.items()), x_nodes)) + + # pp(('OUT', lbl, [(str(x[0]), str(x[1])) for x in current_nodes.items()])) + + return todo + + +def create_implicit_flow(my_ir, flow_graph): + + # first fix IN/OUT + # If a son read a node which in not in OUT, add it + todo = set(my_ir.blocs.keys()) + while todo: + lbl = todo.pop() + irb = my_ir.blocs[lbl] + for lbl_son in my_ir.g.successors(irb.label): + if not lbl_son in my_ir.blocs: + print "cannot find bloc!!", lbl + continue + irb_son = my_ir.blocs[lbl_son] + for n_r in irb_son.in_nodes: + if n_r in irb.out_nodes: + continue + if not isinstance(n_r, ExprId): + continue + + # print "###", n_r + # print "###", irb + # print "###", 'OUT', [str(x) for x in irb.out_nodes] + # print "###", irb_son + # print "###", 'IN', [str(x) for x in irb_son.in_nodes] + + node_n_w = irb.label, len(irb.lines), n_r + irb.out_nodes[n_r] = node_n_w + if not n_r in irb.in_nodes: + irb.in_nodes[n_r] = irb.label, 0, n_r + node_n_r = irb.in_nodes[n_r] + # print "###", node_n_r + for lbl_p in my_ir.g.predecessors(irb.label): + todo.add(lbl_p) + + flow_graph.add_uniq_edge(node_n_r, node_n_w) + + +def inter_bloc_flow(my_ir, flow_graph, irb_0, link_exec_to_data=True): + + todo = set() + done = set() + todo.add((irb_0, (), ())) + + while todo: + state = todo.pop() + if state in done: + continue + done.add(state) + out = inter_bloc_flow_link(my_ir, flow_graph, state, link_exec_to_data) + todo.update(out) + + +class symb_exec_func: + + """ + This algorithm will do symbolic execution on a function, trying to propagate + states between basic blocs in order to extract inter-blocs dataflow. The + algorithm tries to merge states from blocs with multiple parents. + + There is no real magic here, loops and complex merging will certainly fail. + """ + + def __init__(self, my_ir): + self.todo = set() + self.stateby_ad = {} + self.cpt = {} + self.states_var_done = set() + self.states_done = set() + self.total_done = 0 + self.my_ir = my_ir + + def add_state(self, parent, ad, state): + variables = dict(state.symbols.items()) + + # get bloc dead, and remove from state + b = self.my_ir.get_bloc(ad) + if b is None: + raise ValueError("unknown bloc! %s" % ad) + """ + dead = b.dead[0] + for d in dead: + if d in variables: + del(variables[d]) + """ + variables = variables.items() + + s = parent, ad, tuple(sorted(variables)) + """ + state_var = s[1] + if s in self.states_var_done: + print 'skip state' + return + if not ad in self.stateby_ad: + self.stateby_ad[ad] = set() + self.stateby_ad[ad].add(state_var) + + """ + self.todo.add(s) + + """ + if not ad in self.cpt: + self.cpt[ad] = 0 + """ + """ + def get_next_min(self): + state_by_ad = {} + for state in self.todo: + ad = state[1] + if not ad in state_by_ad: + state_by_ad[ad] = [] + state_by_ad[ad].append(state) + print "XX", [len(x) for x in state_by_ad.values()] + state_by_ad = state_by_ad.items() + state_by_ad.sort(key=lambda x:len(x[1])) + state_by_ad.reverse() + return state_by_ad.pop()[1][0] + """ + + def get_next_state(self): + state = self.todo.pop() + return state + + def do_step(self): + if len(self.todo) == 0: + return None + if self.total_done > 600: + print "symbexec watchdog!" + return None + self.total_done += 1 + print 'CPT', self.total_done + while self.todo: + # if self.total_done>20: + # self.get_next_min() + # state = self.todo.pop() + state = self.get_next_state() + parent, ad, s = state + self.states_done.add(state) + self.states_var_done.add(state) + # if s in self.states_var_done: + # print "state done" + # continue + + sb = symbexec(self.my_ir.arch, dict(s)) + """ + if (not is_dispatcher(ad)) and len(self.stateby_ad[ad]) > 10: + print "DROP", ad + continue + + if (not is_dispatcher(ad)) and len(self.stateby_ad[ad]) > 5: + print ad + big_keys = diff_states(*self.stateby_ad[ad]) + print big_keys + print "MERGE", ad + + if not big_keys: + return parent, sb + #assert(len(big_keys) == 1) + s_out = [] + for k, v in s: + if k not in big_keys : + s_out.append((k, v)) + sb = symbexec(mn, dict(s_out)) + return parent, ad, sb + #diff_states(*self.stateby_ad[ad]) + """ + return parent, ad, sb + return None diff --git a/miasm2/analysis/debugging.py b/miasm2/analysis/debugging.py new file mode 100644 index 00000000..473f20f1 --- /dev/null +++ b/miasm2/analysis/debugging.py @@ -0,0 +1,479 @@ +import cmd +from miasm2.core.utils import hexdump +import miasm2.jitter.csts as csts +from miasm2.jitter.jitload import ExceptionHandle + + +class DebugBreakpoint: + + "Debug Breakpoint parent class" + pass + + +class DebugBreakpointSoft(DebugBreakpoint): + + "Stand for software breakpoint" + + def __init__(self, addr): + self.addr = addr + + def __str__(self): + return "Soft BP @0x%08x" % self.addr + + +class DebugBreakpointMemory(DebugBreakpoint): + + "Stand for memory breakpoint" + + type2str = {csts.BREAKPOINT_READ: "R", + csts.BREAKPOINT_WRITE: "W"} + + def __init__(self, addr, size, access_type): + self.addr = addr + self.access_type = access_type + self.size = size + + def __str__(self): + bp_type = "" + for k, v in self.type2str.items(): + if k & self.access_type != 0: + bp_type += v + return "Memory BP @0x%08x, Size 0x%08x, Type %s" % (self.addr, + self.size, + bp_type) + + @classmethod + def get_access_type(cls, read=False, write=False): + value = 0 + for k, v in cls.type2str.items(): + if v == "R" and read is True: + value += k + if v == "W" and write is True: + value += k + return value + + +class Debugguer(object): + + "Debugguer linked with a Jitter instance" + + def __init__(self, myjit): + "myjit : jitter instance" + self.myjit = myjit + self.bp_list = [] # DebugBreakpointSoft list + self.hw_bp_list = [] # DebugBreakpointHard list + self.mem_watched = [] # Memory areas watched + + def init_run(self, addr): + self.myjit.init_run(addr) + + def add_breakpoint(self, addr): + "Add bp @addr" + bp = DebugBreakpointSoft(addr) + func = lambda x: bp + bp.func = func + self.bp_list.append(bp) + self.myjit.add_breakpoint(addr, func) + + def init_memory_breakpoint(self): + "Set exception handler on EXCEPT_BREAKPOINT_INTERN" + self.myjit.exception_handler + + def add_memory_breakpoint(self, addr, size, read=False, write=False): + "add mem bp @[addr, addr + size], on read/write/both" + access_type = DebugBreakpointMemory.get_access_type(read=read, + write=write) + dbm = DebugBreakpointMemory(addr, size, access_type) + self.hw_bp_list.append(dbm) + self.myjit.vm.vm_add_memory_breakpoint(addr, size, access_type) + + def remove_breakpoint(self, dbs): + "remove the DebugBreakpointSoft instance" + self.bp_list.remove(dbs) + self.myjit.remove_breakpoints_by_callback(dbs.func) + + def remove_breakpoint_by_addr(self, addr): + "remove breakpoints @ addr" + for bp in self.get_breakpoint_by_addr(addr): + self.remove_breakpoint(bp) + + def remove_memory_breakpoint(self, dbm): + "remove the DebugBreakpointMemory instance" + self.hw_bp_list.remove(dbm) + self.myjit.vm.vm_remove_memory_breakpoint(dbm.addr, dbm.access_type) + + def remove_memory_breakpoint_by_addr_access(self, addr, read=False, + write=False): + "remove breakpoints @ addr" + access_type = DebugBreakpointMemory.get_access_type(read=read, + write=write) + for bp in self.hw_bp_list: + if bp.addr == addr and bp.access_type == access_type: + self.remove_memory_breakpoint(bp) + + def get_breakpoint_by_addr(self, addr): + ret = [] + for dbgsoft in self.bp_list: + if dbgsoft.addr == addr: + ret.append(dbgsoft) + return ret + + def get_breakpoints(self): + return self.bp_list + + def active_trace(self, mn=None, regs=None, newbloc=None): + if mn is not None: + self.myjit.jit.log_mn = mn + if regs is not None: + self.myjit.jit.log_regs = regs + if newbloc is not None: + self.myjit.jit.log_newbloc = newbloc + + def handle_exception(self, res): + if res is None: + return + + if isinstance(res, DebugBreakpointSoft): + print "Breakpoint reached @0x%08x" % res.addr + elif isinstance(res, ExceptionHandle): + if res == ExceptionHandle.memoryBreakpoint(): + print "Memory breakpoint reached!" + + # Remove flag + except_flag = self.myjit.vm.vm_get_exception() + self.myjit.vm.vm_set_exception(except_flag ^ res.except_flag) + + else: + raise NotImplementedError("Unknown Except") + else: + raise NotImplementedError("type res") + + def step(self): + "Step in jit" + + self.myjit.jit.set_options(jit_maxline=1) + self.myjit.jit.updt_automod_code(self.myjit.vm, self.myjit.pc, 8) + + res = self.myjit.continue_run(step=True) + self.handle_exception(res) + + self.myjit.jit.set_options(jit_maxline=50) + self.on_step() + + return res + + def run(self): + res = self.myjit.continue_run() + self.handle_exception(res) + return res + + def get_mem(self, addr, size=0xF): + "hexdump @addr, size" + + hexdump(self.myjit.vm.vm_get_mem(addr, size)) + + def get_mem_raw(self, addr, size=0xF): + "hexdump @addr, size" + return self.myjit.vm.vm_get_mem(addr, size) + + def watch_mem(self, addr, size=0xF): + self.mem_watched.append((addr, size)) + + def on_step(self): + for addr, size in self.mem_watched: + print "@0x%08x:" % addr + self.get_mem(addr, size) + + def get_reg_value(self, reg_name): + return getattr(self.myjit.cpu, reg_name) + + def set_reg_value(self, reg_name, value): + + # Handle PC case + if reg_name == self.myjit.my_ir.pc.name: + self.init_run(value) + + setattr(self.myjit.cpu, reg_name, value) + + def get_gpreg_all(self): + "Return general purposes registers" + return self.myjit.cpu.vm_get_gpreg() + + +class DebugCmd(cmd.Cmd, object): + + "CommandLineInterpreter for Debugguer instance" + + color_g = '\033[92m' + color_e = '\033[0m' + color_b = '\033[94m' + color_r = '\033[91m' + + intro = color_g + "=== Miasm2 Debugging shell ===\nIf you need help, " + intro += "type 'help' or '?'" + color_e + prompt = color_b + "$> " + color_e + + def __init__(self, dbg): + "dbg : Debugguer" + self.dbg = dbg + super(DebugCmd, self).__init__() + + # Debug methods + + def print_breakpoints(self): + bp_list = self.dbg.bp_list + if len(bp_list) == 0: + print "No breakpoints." + else: + for i, b in enumerate(bp_list): + print "%d\t0x%08x" % (i, b.addr) + + def print_watchmems(self): + watch_list = self.dbg.mem_watched + if len(watch_list) == 0: + print "No memory watchpoints." + else: + print "Num\tAddress \tSize" + for i, w in enumerate(watch_list): + addr, size = w + print "%d\t0x%08x\t0x%08x" % (i, addr, size) + + def print_registers(self): + regs = self.dbg.get_gpreg_all() + + # Display settings + title1 = "Registers" + title2 = "Values" + max_name_len = max(map(len, regs.keys() + [title1])) + + # Print value table + s = "%s%s | %s" % ( + title1, " " * (max_name_len - len(title1)), title2) + print s + print "-" * len(s) + for name, value in sorted(regs.items(), key=lambda x: x[0]): + print "%s%s | %s" % (name, + " " * (max_name_len - len(name)), + hex(value).replace("L", "")) + + def add_breakpoints(self, bp_addr): + for addr in bp_addr: + if "0x" in addr: + addr = int(addr, 16) + else: + addr = int(addr) + + good = True + for i, dbg_obj in enumerate(self.dbg.bp_list): + if dbg_obj.addr == addr: + good = False + break + if good is False: + print "Breakpoint 0x%08x already set (%d)" % (addr, i) + else: + l = len(self.dbg.bp_list) + self.dbg.add_breakpoint(addr) + print "Breakpoint 0x%08x successfully added ! (%d)" % (addr, l) + + display_mode = {"mn": None, + "regs": None, + "newbloc": None} + + def update_display_mode(self): + self.display_mode = {"mn": self.dbg.myjit.jit.log_mn, + "regs": self.dbg.myjit.jit.log_regs, + "newbloc": self.dbg.myjit.jit.log_newbloc} + + # Command line methods + def print_warning(self, s): + print self.color_r + s + self.color_e + + def onecmd(self, line): + cmd_translate = {"h": "help", + "q": "exit", + "e": "exit", + "!": "exec", + "r": "run", + "i": "info", + "b": "breakpoint", + "s": "step", + "d": "dump"} + + if len(line) >= 2 and \ + line[1] == " " and \ + line[:1] in cmd_translate: + line = cmd_translate[line[:1]] + line[1:] + + if len(line) == 1 and line in cmd_translate: + line = cmd_translate[line] + + r = super(DebugCmd, self).onecmd(line) + return r + + def can_exit(self): + return True + + def do_display(self, arg): + if arg == "": + self.help_display() + return + + args = arg.split(" ") + if args[-1].lower() not in ["on", "off"]: + self.print_warning("/!\ %s not in 'on' / 'off'" % args[-1]) + return + mode = args[-1].lower() == "on" + d = {} + for a in args[:-1]: + d[a] = mode + self.dbg.active_trace(**d) + self.update_display_mode() + + def help_display(self): + print "Enable/Disable tracing." + print "Usage: display <mode1> <mode2> ... on|off" + print "Available modes are:" + for k in self.display_mode: + print "\t%s" % k + print "Use 'info display' to get current values" + + def do_watchmem(self, arg): + if arg == "": + self.help_watchmem() + return + + args = arg.split(" ") + if len(args) >= 2: + if "0x" in args[1]: + size = int(args[1], 16) + else: + size = int(args[1]) + else: + size = 0xF + if "0x" in args[0]: + addr = int(args[0], 16) + else: + addr = int(args[0]) + + self.dbg.watch_mem(addr, size) + + def help_watchmem(self): + print "Add a memory watcher." + print "Usage: watchmem <addr> [size]" + print "Use 'info watchmem' to get current memory watchers" + + def do_info(self, arg): + av_info = ["registers", + "display", + "breakpoints", + "watchmem"] + + if arg == "": + print "'info' must be followed by the name of an info command." + print "List of info subcommands:" + for k in av_info: + print "\t%s" % k + + if arg.startswith("b"): + # Breakpoint + self.print_breakpoints() + + if arg.startswith("d"): + # Display + self.update_display_mode() + for k, v in self.display_mode.items(): + print "%s\t\t%s" % (k, v) + + if arg.startswith("w"): + # Watchmem + self.print_watchmems() + + if arg.startswith("r"): + # Registers + self.print_registers() + + def help_info(self): + print "Generic command for showing things about the program being" + print "debugged. Use 'info' without arguments to get the list of" + print "available subcommands." + + def do_breakpoint(self, arg): + if arg == "": + self.help_breakpoint() + else: + addrs = arg.split(" ") + self.add_breakpoints(addrs) + + def help_breakpoint(self): + print "Add breakpoints to argument addresses." + print "Example:" + print "\tbreakpoint 0x11223344" + print "\tbreakpoint 1122 0xabcd" + + def do_step(self, arg): + if arg == "": + nb = 1 + else: + nb = int(arg) + for _ in xrange(nb): + self.dbg.step() + + def help_step(self): + print "Step program until it reaches a different source line." + print "Argument N means do this N times (or till program stops" + print "for another reason)." + + def do_dump(self, arg): + if arg == "": + self.help_dump() + else: + args = arg.split(" ") + if len(args) >= 2: + if "0x" in args[1]: + size = int(args[1], 16) + else: + size = int(args[1]) + else: + size = 0xF + if "0x" in args[0]: + addr = int(args[0], 16) + else: + addr = int(args[0]) + + self.dbg.get_mem(addr, size) + + def help_dump(self): + print "Dump <addr> [size]. Dump size bytes at addr." + + def do_run(self, arg): + self.dbg.run() + + def help_run(self): + print "Launch or continue the current program" + + def do_exit(self, s): + return True + + def do_exec(self, l): + try: + print eval(l) + except Exception, e: + print "*** Error: %s" % e + + def help_exec(self): + print "Exec a python command." + print "You can also use '!' shortcut." + + def help_exit(self): + print "Exit the interpreter." + print "You can also use the Ctrl-D shortcut." + + def help_help(self): + print "Print help" + + def postloop(self): + print '\nGoodbye !' + super(DebugCmd, self).postloop() + + do_EOF = do_exit + help_EOF = help_exit diff --git a/miasm2/analysis/disasm_cb.py b/miasm2/analysis/disasm_cb.py new file mode 100644 index 00000000..4b4832f8 --- /dev/null +++ b/miasm2/analysis/disasm_cb.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +from miasm2.expression.expression import * +from miasm2.expression.simplifications import expr_simp +from miasm2.core.asmbloc import * +from miasm2.core.utils import * +# from miasm2.core.graph import DiGraph + + +def get_ira(mnemo, attrib): + arch = mnemo.name, attrib + if arch == ("arm", "arm"): + from miasm2.arch.arm.ira import ir_a_arm_base as ira + elif arch == ("x86", 32): + from miasm2.arch.x86.ira import ir_a_x86_32 as ira + elif arch == ("x86", 64): + from miasm2.arch.x86.ira import ir_a_x86_64 as ira + else: + raise ValueError('unknown architecture: %s' % mnemo.name) + return ira + + +def arm_guess_subcall( + mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool): + ira = get_ira(mnemo, attrib) + + sp = asm_symbol_pool() + my_ir = ira(sp) + print '###' + print cur_bloc + my_ir.add_bloc(cur_bloc) + + ir_blocs = my_ir.blocs.values() + # flow_graph = DiGraph() + to_add = set() + for irb in ir_blocs: + # print 'X'*40 + # print irb + pc_val = None + lr_val = None + for exprs in irb.irs: + for e in exprs: + if e.dst == my_ir.pc: + pc_val = e.src + if e.dst == mnemo.regs.LR: + lr_val = e.src + if pc_val is None or lr_val is None: + continue + if not isinstance(lr_val, ExprInt): + continue + + l = cur_bloc.lines[-1] + if lr_val.arg != l.offset + l.l: + continue + # print 'IS CALL!' + l = symbol_pool.getby_offset_create(int(lr_val.arg)) + c = asm_constraint_next(l) + + to_add.add(c) + offsets_to_dis.add(int(lr_val.arg)) + + # if to_add: + # print 'R'*70 + for c in to_add: + # print c + cur_bloc.addto(c) + + +def arm_guess_jump_table( + mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool): + ira = get_ira(mnemo, attrib) + + jra = ExprId('jra') + jrb = ExprId('jrb') + + sp = asm_symbol_pool() + my_ir = ira(sp) + my_ir.add_bloc(cur_bloc) + + ir_blocs = my_ir.blocs.values() + for irb in ir_blocs: + # print 'X'*40 + # print irb + pc_val = None + # lr_val = None + for exprs in irb.irs: + for e in exprs: + if e.dst == my_ir.pc: + pc_val = e.src + # if e.dst == mnemo.regs.LR: + # lr_val = e.src + if pc_val is None: + continue + if not isinstance(pc_val, ExprMem): + continue + assert(pc_val.size == 32) + print pc_val + ad = pc_val.arg + ad = expr_simp(ad) + print ad + res = MatchExpr(ad, jra + jrb, set([jra, jrb])) + if res is False: + raise NotImplementedError('not fully functional') + print res + if not isinstance(res[jrb], ExprInt): + raise NotImplementedError('not fully functional') + base_ad = int(res[jrb].arg) + print base_ad + addrs = set() + i = -1 + max_table_entry = 10000 + max_diff_addr = 0x100000 # heuristic + while i < max_table_entry: + i += 1 + try: + ad = upck32(pool_bin.getbytes(base_ad + 4 * i, 4)) + except: + break + if abs(ad - base_ad) > max_diff_addr: + break + addrs.add(ad) + print [hex(x) for x in addrs] + + for ad in addrs: + offsets_to_dis.add(ad) + l = symbol_pool.getby_offset_create(ad) + c = asm_constraint_to(l) + cur_bloc.addto(c) + +guess_funcs = [] + + +def guess_multi_cb( + mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool): + for f in guess_funcs: + f(mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool) diff --git a/miasm2/analysis/gdbserver.py b/miasm2/analysis/gdbserver.py new file mode 100644 index 00000000..8d0135e9 --- /dev/null +++ b/miasm2/analysis/gdbserver.py @@ -0,0 +1,425 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +import socket +import struct +import time +import logging +from StringIO import StringIO +import miasm2.analysis.debugging as debugging +from miasm2.jitter.jitload import ExceptionHandle + + +class GdbServer(object): + + "Debugguer binding for GDBServer protocol" + + general_registers_order = [] + general_registers_size = {} # RegName : Size in octet + status = "S05" + + def __init__(self, dbg, port=4455): + server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + server.bind(('localhost', port)) + server.listen(1) + self.server = server + self.dbg = dbg + + # Communication methods + + def compute_checksum(self, data): + return chr(sum(map(ord, data)) % 256).encode("hex") + + def get_messages(self): + all_data = "" + data = self.sock.recv(4096) + all_data += data + while (len(data) == 4096 or data == ""): + if data == "": + # Avoid consuming CPU + time.sleep(0.001) + continue + data = self.sock.recv(4096) + all_data += data + + logging.debug("<- %r" % all_data) + self.recv_queue += self.parse_messages(all_data) + + def parse_messages(self, data): + buf = StringIO(data) + + msgs = [] + + while (buf.tell() < buf.len): + token = buf.read(1) + if token == "+": + continue + if token == "-": + raise NotImplementedError("Resend packet") + if token == "$": + packet_data = "" + c = buf.read(1) + while c != "#": + packet_data += c + c = buf.read(1) + checksum = buf.read(2) + if checksum != self.compute_checksum(packet_data): + raise ValueError("Incorrect checksum") + + msgs.append(packet_data) + + return msgs + + def send_string(self, s): + self.send_queue.append("O" + s.encode("hex")) + + def process_messages(self): + + while self.recv_queue: + msg = self.recv_queue.pop(0) + buf = StringIO(msg) + msg_type = buf.read(1) + + self.send_queue.append("+") + + if msg_type == "q": + if msg.startswith("qSupported"): + self.send_queue.append("PacketSize=3fff") + elif msg.startswith("qC"): + # Current thread + self.send_queue.append("") + elif msg.startswith("qAttached"): + # Not supported + self.send_queue.append("") + elif msg.startswith("qTStatus"): + # Not supported + self.send_queue.append("") + elif msg.startswith("qfThreadInfo"): + # Not supported + self.send_queue.append("") + else: + raise NotImplementedError() + + elif msg_type == "H": + # Set current thread + self.send_queue.append("OK") + + elif msg_type == "?": + # Report why the target halted + self.send_queue.append(self.status) # TRAP signal + + elif msg_type == "g": + # Report all general register values + self.send_queue.append(self.report_general_register_values()) + + elif msg_type == "p": + # Read a specific register + reg_num = int(buf.read(), 16) + self.send_queue.append(self.read_register(reg_num)) + + elif msg_type == "P": + # Set a specific register + reg_num, value = buf.read().split("=") + reg_num = int(reg_num, 16) + value = int(value.decode("hex")[::-1].encode("hex"), 16) + self.set_register(reg_num, value) + self.send_queue.append("OK") + + elif msg_type == "m": + # Read memory + addr, size = map(lambda x: int(x, 16), buf.read().split(",")) + self.send_queue.append(self.read_memory(addr, size)) + + elif msg_type == "k": + # Kill + self.sock.close() + exit(1) + + elif msg_type == "!": + # Extending debugging will be used + self.send_queue.append("OK") + + elif msg_type == "v": + if msg == "vCont?": + # Is vCont supported ? + self.send_queue.append("") + + elif msg_type == "s": + # Step + self.dbg.step() + self.send_queue.append("S05") # TRAP signal + + elif msg_type == "Z": + # Add breakpoint or watchpoint + bp_type = buf.read(1) + if bp_type == "0": + # Exec breakpoint + assert(buf.read(1) == ",") + addr, size = map( + lambda x: int(x, 16), buf.read().split(",")) + + if size != 1: + raise NotImplementedError("Bigger size") + self.dbg.add_breakpoint(addr) + self.send_queue.append("OK") + + elif bp_type == "1": + # Hardware BP + assert(buf.read(1) == ",") + addr, size = map( + lambda x: int(x, 16), buf.read().split(",")) + + self.dbg.add_memory_breakpoint(addr, size, + read=True, + write=True) + self.send_queue.append("OK") + + elif bp_type in ["2", "3", "4"]: + # Memory breakpoint + assert(buf.read(1) == ",") + read = bp_type in ["3", "4"] + write = bp_type in ["2", "4"] + addr, size = map( + lambda x: int(x, 16), buf.read().split(",")) + + self.dbg.add_memory_breakpoint(addr, size, + read=read, + write=write) + self.send_queue.append("OK") + + else: + raise ValueError("Impossible value") + + elif msg_type == "z": + # Remove breakpoint or watchpoint + bp_type = buf.read(1) + if bp_type == "0": + # Exec breakpoint + assert(buf.read(1) == ",") + addr, size = map( + lambda x: int(x, 16), buf.read().split(",")) + + if size != 1: + raise NotImplementedError("Bigger size") + dbgsoft = self.dbg.get_breakpoint_by_addr(addr) + assert(len(dbgsoft) == 1) + self.dbg.remove_breakpoint(dbgsoft[0]) + self.send_queue.append("OK") + + elif bp_type == "1": + # Hardware BP + assert(buf.read(1) == ",") + addr, size = map( + lambda x: int(x, 16), buf.read().split(",")) + self.dbg.remove_memory_breakpoint_by_addr_access( + addr, read=True, write=True) + self.send_queue.append("OK") + + elif bp_type in ["2", "3", "4"]: + # Memory breakpoint + assert(buf.read(1) == ",") + read = bp_type in ["3", "4"] + write = bp_type in ["2", "4"] + addr, size = map( + lambda x: int(x, 16), buf.read().split(",")) + + self.dbg.remove_memory_breakpoint_by_addr_access( + addr, read=read, write=write) + self.send_queue.append("OK") + + else: + raise ValueError("Impossible value") + + elif msg_type == "c": + # Continue + self.status = "" + self.send_messages() + ret = self.dbg.run() + if isinstance(ret, debugging.DebugBreakpointSoft): + self.status = "S05" + self.send_queue.append("S05") # TRAP signal + elif isinstance(ret, ExceptionHandle): + if ret == ExceptionHandle.memoryBreakpoint(): + self.status = "S05" + self.send_queue.append("S05") + else: + raise NotImplementedError("Unknown Except") + else: + raise NotImplementedError() + + else: + raise NotImplementedError( + "Not implemented: message type '%s'" % msg_type) + + def send_messages(self): + for msg in self.send_queue: + if msg == "+": + data = "+" + else: + data = "$%s#%s" % (msg, self.compute_checksum(msg)) + logging.debug("-> %r" % data) + self.sock.send(data) + self.send_queue = [] + + def main_loop(self): + self.recv_queue = [] + self.send_queue = [] + + self.send_string("Test\n") + + while (self.sock): + self.get_messages() + self.process_messages() + self.send_messages() + + def run(self): + self.sock, self.address = self.server.accept() + self.main_loop() + + # Debugguer processing methods + def report_general_register_values(self): + s = "" + for i in xrange(len(self.general_registers_order)): + s += self.read_register(i) + return s + + def read_register(self, reg_num): + reg_name = self.general_registers_order[reg_num] + reg_value = self.read_register_by_name(reg_name) + size = self.general_registers_size[reg_name] + + pack_token = "" + if size == 1: + pack_token = "<B" + elif size == 2: + pack_token = "<H" + elif size == 4: + pack_token = "<I" + elif size == 8: + pack_token = "<Q" + else: + raise NotImplementedError("Unknown size") + + return struct.pack(pack_token, reg_value).encode("hex") + + def set_register(self, reg_num, value): + reg_name = self.general_registers_order[reg_num] + self.dbg.set_reg_value(reg_name, value) + + def read_register_by_name(self, reg_name): + return self.dbg.get_reg_value(reg_name) + + def read_memory(self, addr, size): + except_flag_vm = self.dbg.myjit.vm.vm_get_exception() + try: + return self.dbg.get_mem_raw(addr, size).encode("hex") + except RuntimeError: + self.dbg.myjit.vm.vm_set_exception(except_flag_vm) + return "00" * size + + +class GdbServer_x86_32(GdbServer): + + "Extend GdbServer for x86 32bits purposes" + + general_registers_order = ["EAX", "ECX", "EDX", "EBX", "ESP", "EBP", "ESI", + "EDI", "EIP", "EFLAGS", "CS", "SS", "DS", "ES", + "FS", "GS"] + + general_registers_size = {"EAX": 4, + "ECX": 4, + "EDX": 4, + "EBX": 4, + "ESP": 4, + "EBP": 4, + "ESI": 4, + "EDI": 4, + "EIP": 4, + "EFLAGS": 2, + "CS": 2, + "SS": 2, + "DS": 2, + "ES": 2, + "FS": 2, + "GS": 2} + + register_ignore = [ + "tf", "i_f", "nt", "rf", "vm", "ac", "vif", "vip", "i_d"] + + def read_register_by_name(self, reg_name): + sup_func = super(GdbServer_x86_32, self).read_register_by_name + if reg_name == "EFLAGS": + val = 0 + eflags_args = [ + "cf", 1, "pf", 0, "af", 0, "zf", "nf", "tf", "i_f", "df", "of"] + eflags_args += ["nt", 0, "rf", "vm", "ac", "vif", "vip", "i_d"] + eflags_args += [0] * 10 + + for i, arg in enumerate(eflags_args): + if isinstance(arg, str): + if arg not in self.register_ignore: + to_add = sup_func(arg) + else: + to_add = 0 + else: + to_add = arg + + val |= (to_add << i) + return val + else: + return sup_func(reg_name) + + +class GdbServer_msp430(GdbServer): + + "Extend GdbServer for msp430 purposes" + + general_registers_order = ["PC", "SP", "SR", "R3", "R4", "R5", "R6", "R7", + "R8", "R9", "R10", "R11", "R12", "R13", "R14", + "R15"] + + general_registers_size = {"PC": 2, + "SP": 2, + "SR": 2, + "R3": 2, + "R2": 2, + "R5": 2, + "R6": 2, + "R7": 2, + "R8": 2, + "R9": 2, + "R10": 2, + "R11": 2, + "R12": 2, + "R13": 2, + "R12": 2, + "R15": 2} + + def read_register_by_name(self, reg_name): + sup_func = super(GdbServer_msp430, self).read_register_by_name + if reg_name == "SR": + o = sup_func('res') + o <<= 1 + o |= sup_func('of') + o <<= 1 + o |= sup_func('scg1') + o <<= 1 + o |= sup_func('scg0') + o <<= 1 + o |= sup_func('osc') + o <<= 1 + o |= sup_func('cpuoff') + o <<= 1 + o |= sup_func('gie') + o <<= 1 + o |= sup_func('nf') + o <<= 1 + o |= sup_func('zf') + o <<= 1 + o |= sup_func('cf') + + return o + else: + return sup_func(reg_name) + |