about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/focaccia/arch/x86.py13
-rw-r--r--src/focaccia/compare.py2
-rw-r--r--src/focaccia/deterministic.py17
-rw-r--r--src/focaccia/native/tracer.py4
-rw-r--r--src/focaccia/qemu/_qemu_tool.py208
-rw-r--r--src/focaccia/qemu/deterministic.py9
-rw-r--r--src/focaccia/qemu/syscall.py14
-rw-r--r--src/focaccia/qemu/x86.py10
-rw-r--r--src/focaccia/snapshot.py2
-rwxr-xr-xsrc/focaccia/tools/validate_qemu.py3
10 files changed, 182 insertions, 100 deletions
diff --git a/src/focaccia/arch/x86.py b/src/focaccia/arch/x86.py
index b33c6bf..4609f4c 100644
--- a/src/focaccia/arch/x86.py
+++ b/src/focaccia/arch/x86.py
@@ -183,18 +183,6 @@ def compose_rflags(rflags: dict[str, int]) -> int:
         (0x00200000 if rflags.get('ID', 0)  else 0)
     )
 
-# Incomplete, only the most common ones
-emulatedSyscalls = {
-    34: _Sc('pause', []),
-    39: _Sc('getpid', []),
-    102: _Sc('getuid', []),
-    318: _Sc('getrandom', [('rdi', 'rsi', 'char')]),
-}
-
-# Focaccia will do scheduling (and locking ???)
-passthruSyscalls = {
-}
-
 class ArchX86(Arch):
     def __init__(self):
         super().__init__(archname, registers, 64)
@@ -235,3 +223,4 @@ class ArchX86(Arch):
 
     def get_syscall_reg(self) -> str:
         return 'rax'
+
diff --git a/src/focaccia/compare.py b/src/focaccia/compare.py
index 4fea451..3aa5ab2 100644
--- a/src/focaccia/compare.py
+++ b/src/focaccia/compare.py
@@ -34,7 +34,7 @@ def _calc_transformation(previous: ProgramState, current: ProgramState):
         try:
             prev_val = previous.read_register(reg)
             cur_val = current.read_register(reg)
-            transformation.set_register(reg, cur_val - prev_val)
+            transformation.write_register(reg, cur_val - prev_val)
         except RegisterAccessError:
             # Register is not set in either state
             pass
diff --git a/src/focaccia/deterministic.py b/src/focaccia/deterministic.py
index 91afd4e..6d76457 100644
--- a/src/focaccia/deterministic.py
+++ b/src/focaccia/deterministic.py
@@ -1,6 +1,7 @@
 from .arch import Arch
 from .snapshot import ReadableProgramState
 
+from reprlib import repr as alt_repr
 from typing import Callable
 
 class MemoryWriteHole:
@@ -56,7 +57,7 @@ class Event:
 
         mem_write_repr = ''
         for mem_write in self.mem_writes:
-            mem_write_repr += f'{mem_write}\n'
+            mem_write_repr += f'{alt_repr(mem_write)}\n'
 
         repr_str = f'Thread {hex(self.tid)} executed event {self.event_type} at {hex(self.pc)}\n'
         repr_str += f'Register set:\n{reg_repr}'
@@ -306,6 +307,13 @@ finally:
                 return None
             return self._events[self._idx]
 
+        def next_event(self) -> Event | None:
+            if self._idx is None:
+                raise ValueError('Attempted to get next event without synchronizing')
+            if self._idx + 1 >= len(self._events):
+                return None
+            return self._events[self._idx+1]
+
         def update(self, target: ReadableProgramState) -> Event | None:
             # Quick check
             candidates = self._pc_to_event.get(target.read_pc(), [])
@@ -321,13 +329,14 @@ finally:
                         self._in_event = True
                         return self.current_event()
 
-            return self.next()
+            return self.update_to_next()
 
-        def next(self) -> Event | None:
+        def update_to_next(self, count: int = 1) -> Event | None:
             if self._idx is None:
                 raise ValueError('Attempted to get next event without synchronizing')
 
-            self._idx += 1
+            self._in_event = True
+            self._idx += count
             return self.current_event()
 
         def __bool__(self) -> bool:
diff --git a/src/focaccia/native/tracer.py b/src/focaccia/native/tracer.py
index b369b22..4376f41 100644
--- a/src/focaccia/native/tracer.py
+++ b/src/focaccia/native/tracer.py
@@ -217,12 +217,12 @@ class SymbolicTracer:
                 self.target.run()
 
             debug(f'Completed handling event: {current_event}')
-            self.nondet_events.next()
+            self.nondet_events.update_to_next()
 
     def is_stepping_instr(self, instruction: Instruction) -> bool:
         if self.nondet_events.current_event():
             debug('Current instruction matches next event; stepping through it')
-            self.nondet_events.next()
+            self.nondet_events.update_to_next()
             return True
         else:
             if self.target.arch.is_instr_syscall(str(instruction)):
diff --git a/src/focaccia/qemu/_qemu_tool.py b/src/focaccia/qemu/_qemu_tool.py
index ccdf2fb..75b142e 100644
--- a/src/focaccia/qemu/_qemu_tool.py
+++ b/src/focaccia/qemu/_qemu_tool.py
@@ -19,7 +19,8 @@ from focaccia.snapshot import ProgramState, ReadableProgramState, \
 from focaccia.symbolic import SymbolicTransform, eval_symbol, ExprMem
 from focaccia.trace import Trace, TraceEnvironment
 from focaccia.utils import print_result
-from focaccia.deterministic import DeterministicLog, Event
+from focaccia.deterministic import DeterministicLog, DeterministicEventIterator, Event, SyscallEvent
+from focaccia.qemu.deterministic import emulated_system_calls
 
 from focaccia.tools.validate_qemu import make_argparser, verbosity
 
@@ -37,7 +38,13 @@ qemu_crash = {
         'snap': None,
 }
 
-class GDBProgramState(ReadableProgramState):
+def match_event(event: Event, target: ReadableProgramState) -> bool:
+    # Match just on PC
+    if event.pc == target.read_pc():
+        return True
+    return False
+
+class GDBProgramState(ProgramState):
     from focaccia.arch import aarch64, x86
 
     flag_register_names = {
@@ -122,13 +129,12 @@ class GDBProgramState(ReadableProgramState):
             raise MemoryAccessError(addr, size, str(err))
 
 class GDBServerStateIterator:
-    def __init__(self, remote: str, deterministic_log: list[Event] | None):
+    def __init__(self, remote: str, deterministic_log: DeterministicLog):
         gdb.execute('set pagination 0')
         gdb.execute('set sysroot')
         gdb.execute('set python print-stack full') # enable complete Python tracebacks
         gdb.execute(f'target remote {remote}')
         self._deterministic_log = deterministic_log
-        self._deterministic_idx = 0
         self._process = gdb.selected_inferior()
         self._first_next = True
 
@@ -138,96 +144,129 @@ class GDBServerStateIterator:
         archname = split[1] if len(split) > 1 else split[0]
         archname = archname.replace('-', '_')
         if archname not in supported_architectures:
-            print(f'Error: Current platform ({archname}) is not'
-                  f' supported by Focaccia. Exiting.')
-            exit(1)
+            raise NotImplementedError(f'Platform {archname} is not supported by Focaccia')
 
         self.arch = supported_architectures[archname]
         self.binary = self._process.progspace.filename
 
-    def _handle_sync_point(self, call: int, addr: int, length: int, arch: Arch):
-        def _search_next_event(addr: int, idx: int) -> Event | None:
-            if self._deterministic_log is None:
-                return idx, None
-            for i in range(idx, len(self._deterministic_log)):
-                event = self._deterministic_log[i]
-                if event.pc == addr:
-                    return i, event
-            return idx, None
-
-        _new_pc = addr + length
-        print(f'Handling syscall at {hex(_new_pc)} with call number {call}')
-        if int(call) in arch.get_em_syscalls().keys():
-
-            #print(f'Events: {self._deterministic_log[self._deterministic_idx:]}')
-            i, e = _search_next_event(_new_pc, self._deterministic_idx)
-            if e is None:
-                raise Exception(f'No matching event found in deterministic log \
-                                for syscall at {hex(_new_pc)}')
-
-            e = self._deterministic_log[i+1]
-            print(f'Adjusting w/ Event: {e}')
-            gdb.execute(f'set $pc = {hex(_new_pc)}')
-            self._deterministic_idx = i+2
-
-            reg_name = arch.get_syscall_reg()
-            gdb.execute(f'set $rax = {hex(e.registers.get("{reg_name}", 0))}')
-
-            assert(len(arch.get_em_syscalls()[int(call)].outputs) == len(e.mem_writes))
-
-            w_idx = 0
-            for _reg, _size, _type in arch.get_em_syscalls()[int(call)].outputs:
-                if arch.to_regname(_size) is not None:
-                    _size = e.registers[_size]
-                else:
-                    _size = int(_size)
-
-                _addr_rr = e.registers[_reg]
-                _w_rr = e.mem_writes[w_idx]
-                w_idx += 1
-
-                assert (_size == _w_rr.size), f'{_size} != {_w_rr.size}'
-                _addr = gdb.selected_frame().read_register(_reg)
-                cmd = f'set {{char[{_size}]}}{hex(_addr)} = 0x{_w_rr.data.hex()}'
-                # print(f'GDB: {cmd}')
-                gdb.execute(cmd)
-
-            return _new_pc
-
-        return addr
+        self._deterministic_events = DeterministicEventIterator(self._deterministic_log, match_event)
+
+        # Filter non-deterministic events for event after start
+        self._deterministic_events.update(self.current_state())
+        self._deterministic_events.update_to_next()
+
+    def current_state(self) -> ReadableProgramState:
+        return GDBProgramState(self._process, gdb.selected_frame(), self.arch)
+
+    def _handle_syscall(self) -> GDBProgramState:
+        cur_event = self._deterministic_events.current_event()
+        call = cur_event.registers.get(self.arch.get_syscall_reg())
+
+        post_event = self._deterministic_events.update_to_next()
+        syscall = emulated_system_calls[self.arch.archname].get(call, None)
+        debug(f'Handling event:\n{cur_event}')
+        if syscall is not None:
+            info(f'Replaying system call number {hex(call)}')
+
+            self.skip(post_event.pc)
+            next_state = GDBProgramState(self._process, gdb.selected_frame(), self.arch)
+
+            patchup_regs = [self.arch.get_syscall_reg(), *(syscall.patchup_registers or [])]
+            for reg in patchup_regs:
+                next_state.write_register(reg, post_event.registers.get(reg))
+
+            for mem in post_event.mem_writes:
+                # TODO: handle holes
+                # TODO: address mapping
+                addr, data = mem.address, mem.data
+                next_state.write_memory(addr, data)
+
+            return next_state
+
+        info(f'System call number {hex(call)} not replayed')
+        self._step()
+        if self._is_exited():
+            raise StopIteration
+        return GDBProgramState(self._process, gdb.selected_frame(), self.arch)
+
+    def _handle_event(self) -> GDBProgramState:
+        current_event = self._deterministic_events.current_event()
+        if not current_event:
+            return self.current_state()
+
+        if isinstance(current_event, SyscallEvent):
+            return self._handle_syscall()
+
+        warn(f'Event handling for events of type {current_event.event_type} not implemented')
+        return self.current_state()
+
+    def _is_exited(self) -> bool:
+        return not self._process.is_valid() or len(self._process.threads()) == 0
 
     def __iter__(self):
         return self
 
-    def __next__(self):
+    def __next__(self) -> ReadableProgramState:
         # The first call to __next__ should yield the first program state,
         # i.e. before stepping the first time
         if self._first_next:
             self._first_next = False
             return GDBProgramState(self._process, gdb.selected_frame(), self.arch)
 
+        if match_event(self._deterministic_events.current_event(), self.current_state()):
+            state = self._handle_event()
+            if self._is_exited():
+                raise StopIteration
+            self._deterministic_events.update_to_next()
+            return state
+
         # Step
         pc = gdb.selected_frame().read_register('pc')
         new_pc = pc
         while pc == new_pc:  # Skip instruction chains from REP STOS etc.
-            gdb.execute('si', to_string=True)
-            if not self._process.is_valid() or len(self._process.threads()) == 0:
+            self._step()
+            if self._is_exited():
                 raise StopIteration
             new_pc = gdb.selected_frame().read_register('pc')
-            if self._deterministic_log is not None:
-                asm = gdb.selected_frame().architecture().disassemble(new_pc, count=1)[0]
-                if 'syscall' in asm['asm']:
-                    call_reg = self.arch.get_syscall_reg()
-                    new_pc = self._handle_sync_point(gdb.selected_frame().read_register(call_reg), asm['addr'], asm['length'], self.arch)
 
-        return GDBProgramState(self._process, gdb.selected_frame(), self.arch)
+        return self.current_state()
+
+    def run_until(self, addr: int) -> ReadableProgramState:
+        events_handled = 0
+        event = self._deterministic_events.current_event()
+        while event:
+            state = self._run_until_any([addr, event.pc])
+            if state.read_pc() == addr:
+                # Check if we started at the very _start
+                self._first_next = events_handled == 0
+                return state
+
+            self._handle_event()
+
+            event = self._deterministic_events.update_to_next()
+            events_handled += 1
+        return self._run_until_any([addr])
+
+    def _run_until_any(self, addresses: list[int]) -> ReadableProgramState:
+        info(f'Executing until {[hex(x) for x in addresses]}')
+
+        breakpoints = []
+        for addr in addresses:
+            breakpoints.append(gdb.Breakpoint(f'*{addr:#x}'))
 
-    def run_until(self, addr: int):
-        breakpoint = gdb.Breakpoint(f'*{addr:#x}')
         gdb.execute('continue')
-        breakpoint.delete()
+
+        for bp in breakpoints:
+            bp.delete()
+
         return GDBProgramState(self._process, gdb.selected_frame(), self.arch)
 
+    def skip(self, new_pc: int):
+        gdb.execute(f'set $pc = {hex(new_pc)}')
+
+    def _step(self):
+        gdb.execute('si', to_string=True)
+
 def record_minimal_snapshot(prev_state: ReadableProgramState,
                             cur_state: ReadableProgramState,
                             prev_transform: SymbolicTransform,
@@ -270,7 +309,7 @@ def record_minimal_snapshot(prev_state: ReadableProgramState,
         for regname in regs:
             try:
                 regval = cur_state.read_register(regname)
-                out_state.set_register(regname, regval)
+                out_state.write_register(regname, regval)
             except RegisterAccessError:
                 pass
         for mem in mems:
@@ -283,7 +322,7 @@ def record_minimal_snapshot(prev_state: ReadableProgramState,
                 pass
 
     state = ProgramState(cur_transform.arch)
-    state.set_register('PC', cur_transform.addr)
+    state.write_register('PC', cur_transform.addr)
 
     set_values(prev_transform.changed_regs.keys(),
                get_written_addresses(prev_transform),
@@ -334,24 +373,33 @@ def collect_conc_trace(gdb: GDBServerStateIterator, \
     cur_state = next(state_iter)
     symb_i = 0
 
+    if logger.isEnabledFor(logging.DEBUG):
+        debug('Tracing program with the following non-deterministic events:')
+        for event in gdb._deterministic_events.events():
+            debug(event)
+
     # Skip to start
+    pc = cur_state.read_register('pc')
+    start_addr = start_addr if start_addr else pc
     try:
-        pc = cur_state.read_register('pc')
-        if start_addr and pc != start_addr:
-            info(f'Tracing QEMU from starting address: {hex(start_addr)}')
+        if pc != start_addr:
+            info(f'Executing until starting address {hex(start_addr)}')
             cur_state = state_iter.run_until(start_addr)
     except Exception as e:
-        if start_addr:
+        if pc != start_addr:
             raise Exception(f'Unable to reach start address {hex(start_addr)}: {e}')
         raise Exception(f'Unable to trace: {e}')
 
     # An online trace matching algorithm.
+    info(f'Tracing QEMU between {hex(start_addr)}:{hex(stop_addr) if stop_addr else "end"}')
     while True:
         try:
             pc = cur_state.read_register('pc')
+            if stop_addr and pc == stop_addr:
+                break
 
             while pc != strace[symb_i].addr:
-                info(f'PC {hex(pc)} does not match next symbolic reference {hex(strace[symb_i].addr)}')
+                warn(f'PC {hex(pc)} does not match next symbolic reference {hex(strace[symb_i].addr)}')
 
                 next_i = find_index(strace[symb_i+1:], pc, lambda t: t.addr)
 
@@ -409,11 +457,13 @@ def main():
     logging_level = getattr(logging, args.error_level.upper(), logging.INFO)
     logging.basicConfig(level=logging_level, force=True)
 
-    if args.deterministic is not None:
-        replay_log = DeterministicLog(log_dir=args.deterministic)
+    detlog = DeterministicLog(args.deterministic_log)
+    if args.deterministic_log and detlog.base_directory is None:
+        raise NotImplementedError(f'Deterministic log {args.deterministic_log} specified but '
+                                   'Focaccia built without deterministic log support')
 
     try:
-        gdb_server = GDBServerStateIterator(args.remote, replay_log.events())
+        gdb_server = GDBServerStateIterator(args.remote, detlog)
     except Exception as e:
         raise Exception(f'Unable to perform basic GDB setup: {e}')
 
diff --git a/src/focaccia/qemu/deterministic.py b/src/focaccia/qemu/deterministic.py
new file mode 100644
index 0000000..d2d314e
--- /dev/null
+++ b/src/focaccia/qemu/deterministic.py
@@ -0,0 +1,9 @@
+from focaccia.qemu.x86 import emulated_system_calls as x86_emu_syscalls
+
+emulated_system_calls = {
+    'x86_64': x86_emu_syscalls,
+    'aarch64': { },
+    'aarch64l': { },
+    'aarch64b': { }
+}
+
diff --git a/src/focaccia/qemu/syscall.py b/src/focaccia/qemu/syscall.py
new file mode 100644
index 0000000..956f5c9
--- /dev/null
+++ b/src/focaccia/qemu/syscall.py
@@ -0,0 +1,14 @@
+class SyscallInfo:
+    def __init__(self, 
+                 name: str,
+                 patchup_registers: list[str] | None = None,
+                 patchup_address_registers: list[str] | None = None):
+        """Describes a syscall by its name and outputs.
+
+        :param name: The name of a system call.
+        :param patchup_registers: Registers that must be replaced with deterministic values.
+        """
+        self.name = name
+        self.patchup_registers = patchup_registers
+        self.patchup_address_registers = patchup_address_registers
+
diff --git a/src/focaccia/qemu/x86.py b/src/focaccia/qemu/x86.py
new file mode 100644
index 0000000..347bbc4
--- /dev/null
+++ b/src/focaccia/qemu/x86.py
@@ -0,0 +1,10 @@
+from focaccia.qemu.syscall import SyscallInfo
+
+# Incomplete, only the most common ones
+emulated_system_calls = {
+    34:  SyscallInfo('pause', []),
+    39:  SyscallInfo('getpid', []),
+    102: SyscallInfo('getuid', []),
+    318: SyscallInfo('getrandom', patchup_address_registers=['rdi'])
+}
+
diff --git a/src/focaccia/snapshot.py b/src/focaccia/snapshot.py
index f40ac5a..93241c1 100644
--- a/src/focaccia/snapshot.py
+++ b/src/focaccia/snapshot.py
@@ -161,7 +161,7 @@ class ProgramState(ReadableProgramState):
 
         return (regval & acc.mask) >> acc.start
 
-    def set_register(self, reg: str, value: int):
+    def write_register(self, reg: str, value: int):
         """Assign a value to a register.
 
         :raise RegisterAccessError: If `reg` is not a register name.
diff --git a/src/focaccia/tools/validate_qemu.py b/src/focaccia/tools/validate_qemu.py
index 3e0db89..4b5160f 100755
--- a/src/focaccia/tools/validate_qemu.py
+++ b/src/focaccia/tools/validate_qemu.py
@@ -29,6 +29,7 @@ from focaccia.arch import supported_architectures
 from focaccia.qemu.validation_server import start_validation_server
 
 verbosity = {
+    'debug':   ErrorTypes.INFO,
     'info':    ErrorTypes.INFO,
     'warning': ErrorTypes.POSSIBLE,
     'error':   ErrorTypes.CONFIRMED,
@@ -83,7 +84,7 @@ memory, and stepping forward by single instructions.
                       type=str,
                       default='gdb',
                       help='GDB binary to invoke.')
-    prog.add_argument('--deterministic', default=None,
+    prog.add_argument('--deterministic-log', default=None,
                       help='The directory containing rr traces')
     return prog