about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/focaccia/arch/arch.py24
-rw-r--r--src/focaccia/arch/x86.py22
-rw-r--r--src/focaccia/deterministic.py2
-rw-r--r--src/focaccia/qemu/_qemu_tool.py74
-rwxr-xr-xsrc/focaccia/tools/validate_qemu.py4
5 files changed, 119 insertions, 7 deletions
diff --git a/src/focaccia/arch/arch.py b/src/focaccia/arch/arch.py
index c220a3b..2652159 100644
--- a/src/focaccia/arch/arch.py
+++ b/src/focaccia/arch/arch.py
@@ -1,6 +1,17 @@
 from typing import Literal
 from collections.abc import Callable
 
+class SyscallInfo:
+    def __init__(self, name: str, outputs: list[(str, str, str)]):
+        """ Describes a syscall by its name and outputs.
+
+        An output is a regname holding the pointer,
+        the length in bytes--either as a number or as onther register name-- and
+        the type of the output
+        """
+        self.name = name
+        self.outputs = outputs
+
 class RegisterAccessor:
     def __init__(self, regname: str, start_bit: int, end_bit: int):
         """An accessor that describes a range of bits.
@@ -103,6 +114,19 @@ class Arch():
         """
         return False
 
+    def get_em_syscalls(self) -> dict[int, str]:
+        """Returns an architecture specific set of syscalls that Focaccia needs to purely emulate."""
+        raise NotImplementedError("Architecture must implement get_em_syscalls")
+
+    def get_pasthru_syscalls(self) -> dict[int, str]:
+        """Returns an architecture specific set of syscalls that Focaccia needs to passthrough and
+        then warns about missmatching values. Examples are memory and lock related syscalls."""
+        raise NotImplementedError("Architecture must implement get_pasthru_syscalls")
+
+    def get_syscall_reg(self) -> str:
+        """Returns the register name that contains the syscall number."""
+        raise NotImplementedError("Architecture must implement get_syscall_reg")
+
     def is_instr_syscall(self, instr: str) -> bool:
         return False
 
diff --git a/src/focaccia/arch/x86.py b/src/focaccia/arch/x86.py
index a5d29f5..b33c6bf 100644
--- a/src/focaccia/arch/x86.py
+++ b/src/focaccia/arch/x86.py
@@ -1,6 +1,6 @@
 """Architecture-specific configuration."""
 
-from .arch import Arch, RegisterDescription as _Reg
+from .arch import Arch, RegisterDescription as _Reg, SyscallInfo as _Sc
 
 archname = 'x86_64'
 
@@ -183,6 +183,18 @@ def compose_rflags(rflags: dict[str, int]) -> int:
         (0x00200000 if rflags.get('ID', 0)  else 0)
     )
 
+# Incomplete, only the most common ones
+emulatedSyscalls = {
+    34: _Sc('pause', []),
+    39: _Sc('getpid', []),
+    102: _Sc('getuid', []),
+    318: _Sc('getrandom', [('rdi', 'rsi', 'char')]),
+}
+
+# Focaccia will do scheduling (and locking ???)
+passthruSyscalls = {
+}
+
 class ArchX86(Arch):
     def __init__(self):
         super().__init__(archname, registers, 64)
@@ -215,3 +227,11 @@ class ArchX86(Arch):
             return True
         return False
 
+    def get_em_syscalls(self) -> dict[int, str]:
+        return emulatedSyscalls
+
+    def get_pasthru_syscalls(self) -> dict[int, str]:
+        return passthruSyscalls
+
+    def get_syscall_reg(self) -> str:
+        return 'rax'
diff --git a/src/focaccia/deterministic.py b/src/focaccia/deterministic.py
index 4ebcf9e..2a15430 100644
--- a/src/focaccia/deterministic.py
+++ b/src/focaccia/deterministic.py
@@ -57,7 +57,7 @@ class Event:
 
         repr_str = f'Thread {hex(self.tid)} executed event {self.event_type} at {hex(self.pc)}\n'
         repr_str += f'Register set:\n{reg_repr}'
-        
+
         if len(self.mem_writes):
             repr_str += f'\nMemory writes:\n{mem_write_repr}'
 
diff --git a/src/focaccia/qemu/_qemu_tool.py b/src/focaccia/qemu/_qemu_tool.py
index 93849bd..73857a4 100644
--- a/src/focaccia/qemu/_qemu_tool.py
+++ b/src/focaccia/qemu/_qemu_tool.py
@@ -19,6 +19,7 @@ from focaccia.snapshot import ProgramState, ReadableProgramState, \
 from focaccia.symbolic import SymbolicTransform, eval_symbol, ExprMem
 from focaccia.trace import Trace, TraceEnvironment
 from focaccia.utils import print_result
+from focaccia.deterministic import DeterministicLog, Event
 
 from focaccia.tools.validate_qemu import make_argparser, verbosity
 
@@ -121,11 +122,13 @@ class GDBProgramState(ReadableProgramState):
             raise MemoryAccessError(addr, size, str(err))
 
 class GDBServerStateIterator:
-    def __init__(self, remote: str):
+    def __init__(self, remote: str, replay_log: list[Event] | None):
         gdb.execute('set pagination 0')
         gdb.execute('set sysroot')
         gdb.execute('set python print-stack full') # enable complete Python tracebacks
         gdb.execute(f'target remote {remote}')
+        self._replay_log = replay_log
+        self._replay_idx = 0
         self._process = gdb.selected_inferior()
         self._first_next = True
 
@@ -142,6 +145,55 @@ class GDBServerStateIterator:
         self.arch = supported_architectures[archname]
         self.binary = self._process.progspace.filename
 
+    def _handle_sync_point(self, call: int, addr: int, length: int, arch: Arch):
+        def _search_next_event(addr: int, idx: int) -> Event | None:
+            if self._replay_log is None:
+                return idx, None
+            for i in range(idx, len(self._replay_log)):
+                event = self._replay_log[i]
+                if event.pc == addr:
+                    return i, event
+            return idx, None
+
+        _new_pc = addr + length
+        print(f'Handling syscall at {hex(_new_pc)} with call number {call}')
+        if int(call) in arch.get_em_syscalls().keys():
+
+            #print(f'Events: {self._replay_log[self._replay_idx:]}')
+            i, e = _search_next_event(_new_pc, self._replay_idx)
+            if e is None:
+                raise Exception(f'No matching event found in deterministic log \
+                                for syscall at {hex(_new_pc)}')
+
+            e = self._replay_log[i+1]
+            print(f'Adjusting w/ Event: {e}')
+            gdb.execute(f'set $pc = {hex(_new_pc)}')
+            self._replay_idx = i+2
+
+            reg_name = arch.get_syscall_reg()
+            gdb.execute(f'set $rax = {hex(e.registers.get("{reg_name}", 0))}')
+
+            assert(len(arch.get_em_syscalls()[int(call)].outputs) == len(e.mem_writes))
+
+            w_idx = 0
+            for _reg, _size, _type in arch.get_em_syscalls()[int(call)].outputs:
+                if arch.to_regname(_size) is not None:
+                    _size = e.registers[_size]
+                else:
+                    _size = int(_size)
+
+                _addr_rr = e.registers[_reg]
+                _size_rr = e.mem_writes[_addr_rr]
+
+                assert (_size == _size_rr), f'{_size} != {_size_rr}'
+                _addr = gdb.selected_frame().read_register(_reg)
+                # TODO
+                gdb.execute(f'set {{{_type}[_src]}}{_addr} = *({_type}[{_size}] *){_addr}')
+
+            return _new_pc
+
+        return addr
+
     def __iter__(self):
         return self
 
@@ -160,6 +212,11 @@ class GDBServerStateIterator:
             if not self._process.is_valid() or len(self._process.threads()) == 0:
                 raise StopIteration
             new_pc = gdb.selected_frame().read_register('pc')
+            if self._replay_log is not None:
+                asm = gdb.selected_frame().architecture().disassemble(new_pc, count=1)[0]
+                if 'syscall' in asm['asm']:
+                    call_reg = self.arch.get_syscall_reg()
+                    new_pc = self._handle_sync_point(gdb.selected_frame().read_register(call_reg), asm['addr'], asm['length'], self.arch)
 
         return GDBProgramState(self._process, gdb.selected_frame(), self.arch)
 
@@ -341,17 +398,26 @@ def collect_conc_trace(gdb: GDBServerStateIterator, \
     # Note: this may occur when symbolic traces were gathered with a stop address
     if symb_i >= len(strace):
         warn(f'QEMU executed more states than native execution: {symb_i} vs {len(strace)-1}')
-        
+
     return states, matched_transforms
 
 def main():
     args = make_argparser().parse_args()
-    
+
     logging_level = getattr(logging, args.error_level.upper(), logging.INFO)
     logging.basicConfig(level=logging_level, force=True)
 
+    if args.deterministic is not None:
+        replay_log = DeterministicLog(log_dir=args.deterministic)
+
+    if args.deterministic is not None:
+        replay_log = DeterministicLog(log_dir=args.deterministic)
+
+    print(f'Events: {list(replay_log.raw_events())}')
+    print(f'Maps: {list(replay_log.raw_mmaps())}')
+    exit(0)
     try:
-        gdb_server = GDBServerStateIterator(args.remote)
+        gdb_server = GDBServerStateIterator(args.remote, replay_log.events())
     except Exception as e:
         raise Exception(f'Unable to perform basic GDB setup: {e}')
 
diff --git a/src/focaccia/tools/validate_qemu.py b/src/focaccia/tools/validate_qemu.py
index 48b3f1c..3e0db89 100755
--- a/src/focaccia/tools/validate_qemu.py
+++ b/src/focaccia/tools/validate_qemu.py
@@ -79,10 +79,12 @@ memory, and stepping forward by single instructions.
     prog.add_argument('--remote',
                       type=str,
                       help='The hostname:port pair at which to find a QEMU GDB server.')
-    prog.add_argument('--gdb', 
+    prog.add_argument('--gdb',
                       type=str,
                       default='gdb',
                       help='GDB binary to invoke.')
+    prog.add_argument('--deterministic', default=None,
+                      help='The directory containing rr traces')
     return prog
 
 def quoted(s: str) -> str: