diff options
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/qemu_tool.py | 233 | ||||
| -rw-r--r-- | tools/verify_qemu.py (renamed from tools/invoke_qemu_tool.py) | 47 |
2 files changed, 134 insertions, 146 deletions
diff --git a/tools/qemu_tool.py b/tools/qemu_tool.py index c7730fd..d670692 100644 --- a/tools/qemu_tool.py +++ b/tools/qemu_tool.py @@ -3,134 +3,123 @@ gdb -n --batch -x qemu_tool.py """ -import argparse -import re -import shlex -import subprocess -from typing import TextIO +import gdb +import platform import focaccia.parser as parser -from focaccia.arch import x86 -from focaccia.lldb_target import MemoryMap +from focaccia.arch import supported_architectures, Arch +from focaccia.compare import compare_symbolic, ErrorTypes from focaccia.snapshot import ProgramState - -def parse_memory_maps(stream: TextIO) -> tuple[list[MemoryMap], str]: - """ - :return: Returns the list of parsed memory mappings as well as the first - line in the stream that does not belong to the memory mapping - information, i.e. the line that terminates the block of mapping - information. - The line is returned for the technical reason that the parser - needs to read a line from the stream in order to determine that - this line does no longer belong to the mapping information; but it - might still contain other important information. - """ - mappings = [] - while True: - line = stream.readline() - split = line.split(' ') - if len(split) != 3 or not re.match('^[0-9a-f]+-[0-9a-f]+$', split[0]): - return mappings, line - - addr_range, size, perms = split - start, end = addr_range.split('-') - start, end = int(start, 16), int(end, 16) - mappings.append(MemoryMap(start, end, '[unnamed]', perms)) - -def copy_memory(proc, state: ProgramState, maps: list[MemoryMap]): - """Copy memory from a GDB process to a ProgramState object. - - Problem: Reading large mappings via GDB takes way too long (~500ms for ~8MB). - """ - for mapping in maps: - # Only copy read- and writeable memory from the process. This is a - # heuristic to try to copy only heap and stack. - if 'rw' not in mapping.perms: - continue - - map_size = mapping.end_address - mapping.start_address - mem = proc.read_memory(mapping.start_address, map_size) - assert(mem.contiguous) - assert(mem.nbytes == len(mem.tobytes())) - assert(mem.nbytes == map_size) - state.write_memory(mapping.start_address, mem.tobytes()) - -def run_gdb(qemu_log: TextIO, qemu_port: int) -> list[ProgramState]: - import gdb - - gdb.execute('set pagination 0') - gdb.execute('set sysroot') - gdb.execute(f'target remote localhost:{qemu_port}') - process = gdb.selected_inferior() - - arch = x86.ArchX86() - mappings: list[MemoryMap] = [] - states: list[ProgramState] = [] - - while process.is_valid() and len(process.threads()) > 0: - for line in qemu_log: - if re.match('^start +end +size +prot$', line): - mappings, line = parse_memory_maps(qemu_log) - - if line.startswith('Trace'): - states.append(ProgramState(arch)) - copy_memory(process, states[-1], mappings) - continue - - if states: - parser._parse_qemu_line(line, states[-1]) - - gdb.execute('si', to_string=True) +from focaccia.symbolic import SymbolicTransform, eval_symbol +from focaccia.utils import print_result + +from verify_qemu import make_argparser + +class GDBProgramState: + def __init__(self, process: gdb.Inferior, frame: gdb.Frame): + self._proc = process + self._frame = frame + + def read_register(self, regname: str) -> int | None: + try: + return int(self._frame.read_register(regname.lower())) + except ValueError as err: + from focaccia.arch import x86 + rflags = int(self._frame.read_register('eflags')) + rflags = x86.decompose_rflags(rflags) + if regname in rflags: + return rflags[regname] + + print(f'{regname}: {err}') + return None + + def read_memory(self, addr: int, size: int) -> bytes | None: + try: + return self._proc.read_memory(addr, size).tobytes() + except gdb.MemoryError as err: + print(f'@{size}[{hex(addr)}]: {err}') + return None + +class GDBServerStateIterator: + def __init__(self, address: str, port: int): + gdb.execute('set pagination 0') + gdb.execute('set sysroot') + gdb.execute(f'target remote {address}:{port}') + self._process = gdb.selected_inferior() + self._first_next = True + + def __iter__(self): + return self + + def __next__(self): + # The first call to __next__ should yield the first program state, + # i.e. before stepping the first time + if self._first_next: + self._first_next = False + return GDBProgramState(self._process, gdb.selected_frame()) + + # Step + pc = gdb.selected_frame().read_register('pc') + new_pc = pc + while pc == new_pc: + gdb.execute('si', to_string=True) + if not self._process.is_valid() or len(self._process.threads()) == 0: + raise StopIteration + new_pc = gdb.selected_frame().read_register('pc') + + return GDBProgramState(self._process, gdb.selected_frame()) + +def collect_conc_trace(arch: Arch, \ + gdb: GDBServerStateIterator, \ + strace: list[SymbolicTransform]) \ + -> list[ProgramState]: + states = [] + for qemu, transform in zip(gdb, strace): + qemu_pc = qemu.read_register('pc') + assert(qemu_pc is not None) + + if qemu_pc != transform.addr: + print(f'Fatal error: QEMU\'s program counter' + f' ({hex(qemu_pc)}) does not match the' + f' expected program counter in the symbolic trace' + f' ({hex(transform.addr)}).') + print(f'Processing only partial trace up to this instruction.') + return states + + state = ProgramState(arch) + state.set_register('PC', transform.addr) + + accessed_regs = transform.get_used_registers() + accessed_mems = transform.get_used_memory_addresses() + for regname in accessed_regs: + regval = qemu.read_register(regname) + if regval is not None: + state.set_register(regname, regval) + for mem in accessed_mems: + assert(mem.size % 8 == 0) + addr = eval_symbol(mem.ptr, qemu) + mem = qemu.read_memory(addr, int(mem.size / 8)) + if mem is not None: + state.write_memory(addr, mem) + states.append(state) return states -def make_argparser(): - prog = argparse.ArgumentParser() - prog.add_argument('binary', - type=str, - help='The binary to run and record.') - prog.add_argument('--binary-args', - type=str, - help='A string of arguments to be passed to the binary.') - prog.add_argument('--output', '-o', help='Name of output file.') - prog.add_argument('--gdbserver-port', type=int, default=12421) - prog.add_argument('--qemu', type=str, default='qemu-x86_64', - help='QEMU binary to invoke. [Default: qemu-x86_64') - prog.add_argument('--qemu-log', type=str, default='qemu.log') - prog.add_argument('--qemu-extra-args', type=str, default='', - help='Arguments passed to QEMU in addition to the' - ' default ones required by this script.') - return prog - -if __name__ == "__main__": +def main(): args = make_argparser().parse_args() - binary = args.binary - binary_args = shlex.split(args.binary_args) if args.binary_args else '' - - qemu_bin = args.qemu gdbserver_port = args.gdbserver_port - qemu_log_name = args.qemu_log - qemu_args = [ - qemu_bin, - '--trace', 'target_mmap*', - '--trace', 'memory_notdirty_*', - # We write QEMU's output to a log file, then read it from that file. - # This is preferred over reading from the process's stdout pipe because - # we require a non-blocking solution that returns when all available - # lines have been read. - '-D', qemu_log_name, - '-d', 'cpu,fpu,exec,unimp,page,strace', - '-g', str(gdbserver_port), - *shlex.split(args.qemu_extra_args), - binary, - *binary_args, - ] - - qemu = subprocess.Popen(qemu_args) - - with open(qemu_log_name, 'r') as qemu_log: - snapshots = run_gdb(qemu_log, gdbserver_port) - - with open(args.output, 'w') as file: - parser.serialize_snapshots(snapshots, file) + with open(args.symb_trace, 'r') as strace: + symb_transforms = parser.parse_transformations(strace) + + arch = supported_architectures[platform.machine()] + conc_states = collect_conc_trace( + arch, + GDBServerStateIterator('localhost', gdbserver_port), + symb_transforms) + + res = compare_symbolic(conc_states, symb_transforms) + print_result(res, ErrorTypes.POSSIBLE) + +if __name__ == "__main__": + main() diff --git a/tools/invoke_qemu_tool.py b/tools/verify_qemu.py index 152c208..98437cb 100644 --- a/tools/invoke_qemu_tool.py +++ b/tools/verify_qemu.py @@ -1,39 +1,32 @@ """ -This mechanism exists to retrieve per-instruction program snapshots from QEMU, -specifically including memory dumps. This is surprisingly nontrivial (we don't -have a log option like `-d memory`), and the mechanism we have implemented to -achieve this is accordingly complicated. - -In short: We use QEMU's feature to interact with the emulation via a GDB server -interface together with parsing QEMU's logs to record register and memory state -at single-instruction intervals. - -We need QEMU's log in addition to the GDB server because QEMU's GDB server does -not support querying memory mapping information. We need this information to -know from where we need to read memory, so we parse memory mappings from the -log (option `-d page`). +Spawn GDB, connect to QEMU's GDB server, and read test states from that. We need two scripts (this one and the primary `qemu_tool.py`) because we can't pass arguments to scripts executed via `gdb -x <script>`. -This script (`invoke_qemu_tool.py`) is the one the user interfaces with. It +This script (`verify_qemu.py`) is the one the user interfaces with. It eventually calls `execv` to spawn a GDB process that calls the main -`qemu_tool.py` script; `python invoke_qemu_tool.py` essentially behaves as if +`qemu_tool.py` script; `python verify_qemu.py` essentially behaves as if something like `gdb --batch -x qemu_tool.py` were executed instead. Before it starts GDB, though, it parses command line arguments and applies some weird but necessary logic to pass them to `qemu_tool.py`. - -The main script `qemu_tool.py`, which runs inside of GDB, finally forks a QEMU -instance that provides a GDB server and writes its logs to a file. It then -connects GDB to that server and incrementally reads the QEMU logs while -stepping through the program. Doing that, it generates program snapshots at -each instruction. """ +import argparse import os +import subprocess import sys -from qemu_tool import make_argparser +def make_argparser(): + """This is also used by the GDB-invoked script to parse its args.""" + prog = argparse.ArgumentParser() + prog.add_argument('--symb-trace', + required=True, + help='A symbolic transformation trace to be used for' \ + ' verification.') + prog.add_argument('--output', '-o', help='Name of output file.') + prog.add_argument('gdbserver_port', type=int) + return prog def quoted(s: str) -> str: return f'"{s}"' @@ -67,12 +60,18 @@ if __name__ == "__main__": # mechanism to pass arguments to a script that it executes, so we # overwrite `sys.argv` manually before invoking the script. argv_str = f'[{", ".join(quoted(a) for a in argv)}]' + path_str = f'[{", ".join(quoted(s) for s in sys.path)}]' - os.execv(args.gdb, [ + gdb_cmd = [ args.gdb, '-nx', # Don't parse any .gdbinits '--batch-silent' if args.quiet else '--batch', '-ex', f'py import sys', '-ex', f'py sys.argv = {argv_str}', + '-ex', f'py sys.path = {path_str}', '-x', qemu_tool_path - ]) + ] + proc = subprocess.Popen(gdb_cmd) + + ret = proc.wait() + exit(ret) |