diff options
| author | Theofilos Augoustis <theofilos.augoustis@gmail.com> | 2025-11-13 18:47:40 +0000 |
|---|---|---|
| committer | Theofilos Augoustis <theofilos.augoustis@gmail.com> | 2025-11-14 15:14:56 +0000 |
| commit | 6247fb055a5c1e3eddb9948b3abf4c1e766edc08 (patch) | |
| tree | b0ad9c0fbc0aea1080d646fa96d5346fef99baa4 | |
| parent | d103a0532b3cb7c73a2522022ff6c971af98f650 (diff) | |
| download | focaccia-6247fb055a5c1e3eddb9948b3abf4c1e766edc08.tar.gz focaccia-6247fb055a5c1e3eddb9948b3abf4c1e766edc08.zip | |
Refactor event tracing in QEMU tool
| -rw-r--r-- | src/focaccia/arch/x86.py | 13 | ||||
| -rw-r--r-- | src/focaccia/compare.py | 2 | ||||
| -rw-r--r-- | src/focaccia/deterministic.py | 17 | ||||
| -rw-r--r-- | src/focaccia/native/tracer.py | 4 | ||||
| -rw-r--r-- | src/focaccia/qemu/_qemu_tool.py | 208 | ||||
| -rw-r--r-- | src/focaccia/qemu/deterministic.py | 9 | ||||
| -rw-r--r-- | src/focaccia/qemu/syscall.py | 14 | ||||
| -rw-r--r-- | src/focaccia/qemu/x86.py | 10 | ||||
| -rw-r--r-- | src/focaccia/snapshot.py | 2 | ||||
| -rwxr-xr-x | src/focaccia/tools/validate_qemu.py | 3 |
10 files changed, 182 insertions, 100 deletions
diff --git a/src/focaccia/arch/x86.py b/src/focaccia/arch/x86.py index b33c6bf..4609f4c 100644 --- a/src/focaccia/arch/x86.py +++ b/src/focaccia/arch/x86.py @@ -183,18 +183,6 @@ def compose_rflags(rflags: dict[str, int]) -> int: (0x00200000 if rflags.get('ID', 0) else 0) ) -# Incomplete, only the most common ones -emulatedSyscalls = { - 34: _Sc('pause', []), - 39: _Sc('getpid', []), - 102: _Sc('getuid', []), - 318: _Sc('getrandom', [('rdi', 'rsi', 'char')]), -} - -# Focaccia will do scheduling (and locking ???) -passthruSyscalls = { -} - class ArchX86(Arch): def __init__(self): super().__init__(archname, registers, 64) @@ -235,3 +223,4 @@ class ArchX86(Arch): def get_syscall_reg(self) -> str: return 'rax' + diff --git a/src/focaccia/compare.py b/src/focaccia/compare.py index 4fea451..3aa5ab2 100644 --- a/src/focaccia/compare.py +++ b/src/focaccia/compare.py @@ -34,7 +34,7 @@ def _calc_transformation(previous: ProgramState, current: ProgramState): try: prev_val = previous.read_register(reg) cur_val = current.read_register(reg) - transformation.set_register(reg, cur_val - prev_val) + transformation.write_register(reg, cur_val - prev_val) except RegisterAccessError: # Register is not set in either state pass diff --git a/src/focaccia/deterministic.py b/src/focaccia/deterministic.py index 91afd4e..6d76457 100644 --- a/src/focaccia/deterministic.py +++ b/src/focaccia/deterministic.py @@ -1,6 +1,7 @@ from .arch import Arch from .snapshot import ReadableProgramState +from reprlib import repr as alt_repr from typing import Callable class MemoryWriteHole: @@ -56,7 +57,7 @@ class Event: mem_write_repr = '' for mem_write in self.mem_writes: - mem_write_repr += f'{mem_write}\n' + mem_write_repr += f'{alt_repr(mem_write)}\n' repr_str = f'Thread {hex(self.tid)} executed event {self.event_type} at {hex(self.pc)}\n' repr_str += f'Register set:\n{reg_repr}' @@ -306,6 +307,13 @@ finally: return None return self._events[self._idx] + def next_event(self) -> Event | None: + if self._idx is None: + raise ValueError('Attempted to get next event without synchronizing') + if self._idx + 1 >= len(self._events): + return None + return self._events[self._idx+1] + def update(self, target: ReadableProgramState) -> Event | None: # Quick check candidates = self._pc_to_event.get(target.read_pc(), []) @@ -321,13 +329,14 @@ finally: self._in_event = True return self.current_event() - return self.next() + return self.update_to_next() - def next(self) -> Event | None: + def update_to_next(self, count: int = 1) -> Event | None: if self._idx is None: raise ValueError('Attempted to get next event without synchronizing') - self._idx += 1 + self._in_event = True + self._idx += count return self.current_event() def __bool__(self) -> bool: diff --git a/src/focaccia/native/tracer.py b/src/focaccia/native/tracer.py index b369b22..4376f41 100644 --- a/src/focaccia/native/tracer.py +++ b/src/focaccia/native/tracer.py @@ -217,12 +217,12 @@ class SymbolicTracer: self.target.run() debug(f'Completed handling event: {current_event}') - self.nondet_events.next() + self.nondet_events.update_to_next() def is_stepping_instr(self, instruction: Instruction) -> bool: if self.nondet_events.current_event(): debug('Current instruction matches next event; stepping through it') - self.nondet_events.next() + self.nondet_events.update_to_next() return True else: if self.target.arch.is_instr_syscall(str(instruction)): diff --git a/src/focaccia/qemu/_qemu_tool.py b/src/focaccia/qemu/_qemu_tool.py index ccdf2fb..75b142e 100644 --- a/src/focaccia/qemu/_qemu_tool.py +++ b/src/focaccia/qemu/_qemu_tool.py @@ -19,7 +19,8 @@ from focaccia.snapshot import ProgramState, ReadableProgramState, \ from focaccia.symbolic import SymbolicTransform, eval_symbol, ExprMem from focaccia.trace import Trace, TraceEnvironment from focaccia.utils import print_result -from focaccia.deterministic import DeterministicLog, Event +from focaccia.deterministic import DeterministicLog, DeterministicEventIterator, Event, SyscallEvent +from focaccia.qemu.deterministic import emulated_system_calls from focaccia.tools.validate_qemu import make_argparser, verbosity @@ -37,7 +38,13 @@ qemu_crash = { 'snap': None, } -class GDBProgramState(ReadableProgramState): +def match_event(event: Event, target: ReadableProgramState) -> bool: + # Match just on PC + if event.pc == target.read_pc(): + return True + return False + +class GDBProgramState(ProgramState): from focaccia.arch import aarch64, x86 flag_register_names = { @@ -122,13 +129,12 @@ class GDBProgramState(ReadableProgramState): raise MemoryAccessError(addr, size, str(err)) class GDBServerStateIterator: - def __init__(self, remote: str, deterministic_log: list[Event] | None): + def __init__(self, remote: str, deterministic_log: DeterministicLog): gdb.execute('set pagination 0') gdb.execute('set sysroot') gdb.execute('set python print-stack full') # enable complete Python tracebacks gdb.execute(f'target remote {remote}') self._deterministic_log = deterministic_log - self._deterministic_idx = 0 self._process = gdb.selected_inferior() self._first_next = True @@ -138,96 +144,129 @@ class GDBServerStateIterator: archname = split[1] if len(split) > 1 else split[0] archname = archname.replace('-', '_') if archname not in supported_architectures: - print(f'Error: Current platform ({archname}) is not' - f' supported by Focaccia. Exiting.') - exit(1) + raise NotImplementedError(f'Platform {archname} is not supported by Focaccia') self.arch = supported_architectures[archname] self.binary = self._process.progspace.filename - def _handle_sync_point(self, call: int, addr: int, length: int, arch: Arch): - def _search_next_event(addr: int, idx: int) -> Event | None: - if self._deterministic_log is None: - return idx, None - for i in range(idx, len(self._deterministic_log)): - event = self._deterministic_log[i] - if event.pc == addr: - return i, event - return idx, None - - _new_pc = addr + length - print(f'Handling syscall at {hex(_new_pc)} with call number {call}') - if int(call) in arch.get_em_syscalls().keys(): - - #print(f'Events: {self._deterministic_log[self._deterministic_idx:]}') - i, e = _search_next_event(_new_pc, self._deterministic_idx) - if e is None: - raise Exception(f'No matching event found in deterministic log \ - for syscall at {hex(_new_pc)}') - - e = self._deterministic_log[i+1] - print(f'Adjusting w/ Event: {e}') - gdb.execute(f'set $pc = {hex(_new_pc)}') - self._deterministic_idx = i+2 - - reg_name = arch.get_syscall_reg() - gdb.execute(f'set $rax = {hex(e.registers.get("{reg_name}", 0))}') - - assert(len(arch.get_em_syscalls()[int(call)].outputs) == len(e.mem_writes)) - - w_idx = 0 - for _reg, _size, _type in arch.get_em_syscalls()[int(call)].outputs: - if arch.to_regname(_size) is not None: - _size = e.registers[_size] - else: - _size = int(_size) - - _addr_rr = e.registers[_reg] - _w_rr = e.mem_writes[w_idx] - w_idx += 1 - - assert (_size == _w_rr.size), f'{_size} != {_w_rr.size}' - _addr = gdb.selected_frame().read_register(_reg) - cmd = f'set {{char[{_size}]}}{hex(_addr)} = 0x{_w_rr.data.hex()}' - # print(f'GDB: {cmd}') - gdb.execute(cmd) - - return _new_pc - - return addr + self._deterministic_events = DeterministicEventIterator(self._deterministic_log, match_event) + + # Filter non-deterministic events for event after start + self._deterministic_events.update(self.current_state()) + self._deterministic_events.update_to_next() + + def current_state(self) -> ReadableProgramState: + return GDBProgramState(self._process, gdb.selected_frame(), self.arch) + + def _handle_syscall(self) -> GDBProgramState: + cur_event = self._deterministic_events.current_event() + call = cur_event.registers.get(self.arch.get_syscall_reg()) + + post_event = self._deterministic_events.update_to_next() + syscall = emulated_system_calls[self.arch.archname].get(call, None) + debug(f'Handling event:\n{cur_event}') + if syscall is not None: + info(f'Replaying system call number {hex(call)}') + + self.skip(post_event.pc) + next_state = GDBProgramState(self._process, gdb.selected_frame(), self.arch) + + patchup_regs = [self.arch.get_syscall_reg(), *(syscall.patchup_registers or [])] + for reg in patchup_regs: + next_state.write_register(reg, post_event.registers.get(reg)) + + for mem in post_event.mem_writes: + # TODO: handle holes + # TODO: address mapping + addr, data = mem.address, mem.data + next_state.write_memory(addr, data) + + return next_state + + info(f'System call number {hex(call)} not replayed') + self._step() + if self._is_exited(): + raise StopIteration + return GDBProgramState(self._process, gdb.selected_frame(), self.arch) + + def _handle_event(self) -> GDBProgramState: + current_event = self._deterministic_events.current_event() + if not current_event: + return self.current_state() + + if isinstance(current_event, SyscallEvent): + return self._handle_syscall() + + warn(f'Event handling for events of type {current_event.event_type} not implemented') + return self.current_state() + + def _is_exited(self) -> bool: + return not self._process.is_valid() or len(self._process.threads()) == 0 def __iter__(self): return self - def __next__(self): + def __next__(self) -> ReadableProgramState: # The first call to __next__ should yield the first program state, # i.e. before stepping the first time if self._first_next: self._first_next = False return GDBProgramState(self._process, gdb.selected_frame(), self.arch) + if match_event(self._deterministic_events.current_event(), self.current_state()): + state = self._handle_event() + if self._is_exited(): + raise StopIteration + self._deterministic_events.update_to_next() + return state + # Step pc = gdb.selected_frame().read_register('pc') new_pc = pc while pc == new_pc: # Skip instruction chains from REP STOS etc. - gdb.execute('si', to_string=True) - if not self._process.is_valid() or len(self._process.threads()) == 0: + self._step() + if self._is_exited(): raise StopIteration new_pc = gdb.selected_frame().read_register('pc') - if self._deterministic_log is not None: - asm = gdb.selected_frame().architecture().disassemble(new_pc, count=1)[0] - if 'syscall' in asm['asm']: - call_reg = self.arch.get_syscall_reg() - new_pc = self._handle_sync_point(gdb.selected_frame().read_register(call_reg), asm['addr'], asm['length'], self.arch) - return GDBProgramState(self._process, gdb.selected_frame(), self.arch) + return self.current_state() + + def run_until(self, addr: int) -> ReadableProgramState: + events_handled = 0 + event = self._deterministic_events.current_event() + while event: + state = self._run_until_any([addr, event.pc]) + if state.read_pc() == addr: + # Check if we started at the very _start + self._first_next = events_handled == 0 + return state + + self._handle_event() + + event = self._deterministic_events.update_to_next() + events_handled += 1 + return self._run_until_any([addr]) + + def _run_until_any(self, addresses: list[int]) -> ReadableProgramState: + info(f'Executing until {[hex(x) for x in addresses]}') + + breakpoints = [] + for addr in addresses: + breakpoints.append(gdb.Breakpoint(f'*{addr:#x}')) - def run_until(self, addr: int): - breakpoint = gdb.Breakpoint(f'*{addr:#x}') gdb.execute('continue') - breakpoint.delete() + + for bp in breakpoints: + bp.delete() + return GDBProgramState(self._process, gdb.selected_frame(), self.arch) + def skip(self, new_pc: int): + gdb.execute(f'set $pc = {hex(new_pc)}') + + def _step(self): + gdb.execute('si', to_string=True) + def record_minimal_snapshot(prev_state: ReadableProgramState, cur_state: ReadableProgramState, prev_transform: SymbolicTransform, @@ -270,7 +309,7 @@ def record_minimal_snapshot(prev_state: ReadableProgramState, for regname in regs: try: regval = cur_state.read_register(regname) - out_state.set_register(regname, regval) + out_state.write_register(regname, regval) except RegisterAccessError: pass for mem in mems: @@ -283,7 +322,7 @@ def record_minimal_snapshot(prev_state: ReadableProgramState, pass state = ProgramState(cur_transform.arch) - state.set_register('PC', cur_transform.addr) + state.write_register('PC', cur_transform.addr) set_values(prev_transform.changed_regs.keys(), get_written_addresses(prev_transform), @@ -334,24 +373,33 @@ def collect_conc_trace(gdb: GDBServerStateIterator, \ cur_state = next(state_iter) symb_i = 0 + if logger.isEnabledFor(logging.DEBUG): + debug('Tracing program with the following non-deterministic events:') + for event in gdb._deterministic_events.events(): + debug(event) + # Skip to start + pc = cur_state.read_register('pc') + start_addr = start_addr if start_addr else pc try: - pc = cur_state.read_register('pc') - if start_addr and pc != start_addr: - info(f'Tracing QEMU from starting address: {hex(start_addr)}') + if pc != start_addr: + info(f'Executing until starting address {hex(start_addr)}') cur_state = state_iter.run_until(start_addr) except Exception as e: - if start_addr: + if pc != start_addr: raise Exception(f'Unable to reach start address {hex(start_addr)}: {e}') raise Exception(f'Unable to trace: {e}') # An online trace matching algorithm. + info(f'Tracing QEMU between {hex(start_addr)}:{hex(stop_addr) if stop_addr else "end"}') while True: try: pc = cur_state.read_register('pc') + if stop_addr and pc == stop_addr: + break while pc != strace[symb_i].addr: - info(f'PC {hex(pc)} does not match next symbolic reference {hex(strace[symb_i].addr)}') + warn(f'PC {hex(pc)} does not match next symbolic reference {hex(strace[symb_i].addr)}') next_i = find_index(strace[symb_i+1:], pc, lambda t: t.addr) @@ -409,11 +457,13 @@ def main(): logging_level = getattr(logging, args.error_level.upper(), logging.INFO) logging.basicConfig(level=logging_level, force=True) - if args.deterministic is not None: - replay_log = DeterministicLog(log_dir=args.deterministic) + detlog = DeterministicLog(args.deterministic_log) + if args.deterministic_log and detlog.base_directory is None: + raise NotImplementedError(f'Deterministic log {args.deterministic_log} specified but ' + 'Focaccia built without deterministic log support') try: - gdb_server = GDBServerStateIterator(args.remote, replay_log.events()) + gdb_server = GDBServerStateIterator(args.remote, detlog) except Exception as e: raise Exception(f'Unable to perform basic GDB setup: {e}') diff --git a/src/focaccia/qemu/deterministic.py b/src/focaccia/qemu/deterministic.py new file mode 100644 index 0000000..d2d314e --- /dev/null +++ b/src/focaccia/qemu/deterministic.py @@ -0,0 +1,9 @@ +from focaccia.qemu.x86 import emulated_system_calls as x86_emu_syscalls + +emulated_system_calls = { + 'x86_64': x86_emu_syscalls, + 'aarch64': { }, + 'aarch64l': { }, + 'aarch64b': { } +} + diff --git a/src/focaccia/qemu/syscall.py b/src/focaccia/qemu/syscall.py new file mode 100644 index 0000000..956f5c9 --- /dev/null +++ b/src/focaccia/qemu/syscall.py @@ -0,0 +1,14 @@ +class SyscallInfo: + def __init__(self, + name: str, + patchup_registers: list[str] | None = None, + patchup_address_registers: list[str] | None = None): + """Describes a syscall by its name and outputs. + + :param name: The name of a system call. + :param patchup_registers: Registers that must be replaced with deterministic values. + """ + self.name = name + self.patchup_registers = patchup_registers + self.patchup_address_registers = patchup_address_registers + diff --git a/src/focaccia/qemu/x86.py b/src/focaccia/qemu/x86.py new file mode 100644 index 0000000..347bbc4 --- /dev/null +++ b/src/focaccia/qemu/x86.py @@ -0,0 +1,10 @@ +from focaccia.qemu.syscall import SyscallInfo + +# Incomplete, only the most common ones +emulated_system_calls = { + 34: SyscallInfo('pause', []), + 39: SyscallInfo('getpid', []), + 102: SyscallInfo('getuid', []), + 318: SyscallInfo('getrandom', patchup_address_registers=['rdi']) +} + diff --git a/src/focaccia/snapshot.py b/src/focaccia/snapshot.py index f40ac5a..93241c1 100644 --- a/src/focaccia/snapshot.py +++ b/src/focaccia/snapshot.py @@ -161,7 +161,7 @@ class ProgramState(ReadableProgramState): return (regval & acc.mask) >> acc.start - def set_register(self, reg: str, value: int): + def write_register(self, reg: str, value: int): """Assign a value to a register. :raise RegisterAccessError: If `reg` is not a register name. diff --git a/src/focaccia/tools/validate_qemu.py b/src/focaccia/tools/validate_qemu.py index 3e0db89..4b5160f 100755 --- a/src/focaccia/tools/validate_qemu.py +++ b/src/focaccia/tools/validate_qemu.py @@ -29,6 +29,7 @@ from focaccia.arch import supported_architectures from focaccia.qemu.validation_server import start_validation_server verbosity = { + 'debug': ErrorTypes.INFO, 'info': ErrorTypes.INFO, 'warning': ErrorTypes.POSSIBLE, 'error': ErrorTypes.CONFIRMED, @@ -83,7 +84,7 @@ memory, and stepping forward by single instructions. type=str, default='gdb', help='GDB binary to invoke.') - prog.add_argument('--deterministic', default=None, + prog.add_argument('--deterministic-log', default=None, help='The directory containing rr traces') return prog |