about summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--reproducers/issue-508.c7
-rw-r--r--src/focaccia/arch/arch.py24
-rw-r--r--src/focaccia/arch/x86.py22
-rw-r--r--src/focaccia/deterministic.py4
-rw-r--r--src/focaccia/tools/_qemu_tool.py74
-rwxr-xr-xsrc/focaccia/tools/validate_qemu.py4
6 files changed, 126 insertions, 9 deletions
diff --git a/reproducers/issue-508.c b/reproducers/issue-508.c
index e143183..c8fd251 100644
--- a/reproducers/issue-508.c
+++ b/reproducers/issue-508.c
@@ -1,13 +1,18 @@
 #include <stdio.h>
+#include <stdlib.h>
+#include <sys/random.h>
 
 int main() {
   int mem = 0x12345678;
+  int buf = 0;
+  getrandom(&buf, sizeof(buf), 0);
   register long rax asm("rax") = 0x1234567812345678;
-  register int edi asm("edi") = 0x77777777;
+  register int edi asm("edi") = buf;
   asm("cmpxchg %[edi],%[mem]"
       : [ mem ] "+m"(mem), [ rax ] "+r"(rax)
       : [ edi ] "r"(edi));
   long rax2 = rax;
   printf("rax2 = %lx\n", rax2);
+  printf("rand= %d\n", buf);
 }
 
diff --git a/src/focaccia/arch/arch.py b/src/focaccia/arch/arch.py
index c220a3b..2652159 100644
--- a/src/focaccia/arch/arch.py
+++ b/src/focaccia/arch/arch.py
@@ -1,6 +1,17 @@
 from typing import Literal
 from collections.abc import Callable
 
+class SyscallInfo:
+    def __init__(self, name: str, outputs: list[(str, str, str)]):
+        """ Describes a syscall by its name and outputs.
+
+        An output is a regname holding the pointer,
+        the length in bytes--either as a number or as onther register name-- and
+        the type of the output
+        """
+        self.name = name
+        self.outputs = outputs
+
 class RegisterAccessor:
     def __init__(self, regname: str, start_bit: int, end_bit: int):
         """An accessor that describes a range of bits.
@@ -103,6 +114,19 @@ class Arch():
         """
         return False
 
+    def get_em_syscalls(self) -> dict[int, str]:
+        """Returns an architecture specific set of syscalls that Focaccia needs to purely emulate."""
+        raise NotImplementedError("Architecture must implement get_em_syscalls")
+
+    def get_pasthru_syscalls(self) -> dict[int, str]:
+        """Returns an architecture specific set of syscalls that Focaccia needs to passthrough and
+        then warns about missmatching values. Examples are memory and lock related syscalls."""
+        raise NotImplementedError("Architecture must implement get_pasthru_syscalls")
+
+    def get_syscall_reg(self) -> str:
+        """Returns the register name that contains the syscall number."""
+        raise NotImplementedError("Architecture must implement get_syscall_reg")
+
     def is_instr_syscall(self, instr: str) -> bool:
         return False
 
diff --git a/src/focaccia/arch/x86.py b/src/focaccia/arch/x86.py
index a5d29f5..b33c6bf 100644
--- a/src/focaccia/arch/x86.py
+++ b/src/focaccia/arch/x86.py
@@ -1,6 +1,6 @@
 """Architecture-specific configuration."""
 
-from .arch import Arch, RegisterDescription as _Reg
+from .arch import Arch, RegisterDescription as _Reg, SyscallInfo as _Sc
 
 archname = 'x86_64'
 
@@ -183,6 +183,18 @@ def compose_rflags(rflags: dict[str, int]) -> int:
         (0x00200000 if rflags.get('ID', 0)  else 0)
     )
 
+# Incomplete, only the most common ones
+emulatedSyscalls = {
+    34: _Sc('pause', []),
+    39: _Sc('getpid', []),
+    102: _Sc('getuid', []),
+    318: _Sc('getrandom', [('rdi', 'rsi', 'char')]),
+}
+
+# Focaccia will do scheduling (and locking ???)
+passthruSyscalls = {
+}
+
 class ArchX86(Arch):
     def __init__(self):
         super().__init__(archname, registers, 64)
@@ -215,3 +227,11 @@ class ArchX86(Arch):
             return True
         return False
 
+    def get_em_syscalls(self) -> dict[int, str]:
+        return emulatedSyscalls
+
+    def get_pasthru_syscalls(self) -> dict[int, str]:
+        return passthruSyscalls
+
+    def get_syscall_reg(self) -> str:
+        return 'rax'
diff --git a/src/focaccia/deterministic.py b/src/focaccia/deterministic.py
index e7914a3..205b28e 100644
--- a/src/focaccia/deterministic.py
+++ b/src/focaccia/deterministic.py
@@ -165,7 +165,7 @@ def parse_aarch64_registers(enc_regs: bytes, order: str='little', signed: bool=F
     regs = {}
     for i in range(len(regnames)):
         regs[regnames[i]] = parse_reg()
-    
+
     return regs
 
 class MemoryWriteHole:
@@ -237,7 +237,7 @@ class Event:
 
         repr_str = f'Thread {hex(self.tid)} executed event {self.event_type} at {hex(self.pc)}\n'
         repr_str += f'Register set:\n{reg_repr}'
-        
+
         if len(self.mem_writes):
             repr_str += f'\nMemory writes:\n{mem_write_repr}'
 
diff --git a/src/focaccia/tools/_qemu_tool.py b/src/focaccia/tools/_qemu_tool.py
index 02d150b..71481d9 100644
--- a/src/focaccia/tools/_qemu_tool.py
+++ b/src/focaccia/tools/_qemu_tool.py
@@ -19,6 +19,7 @@ from focaccia.snapshot import ProgramState, ReadableProgramState, \
 from focaccia.symbolic import SymbolicTransform, eval_symbol, ExprMem
 from focaccia.trace import Trace, TraceEnvironment
 from focaccia.utils import print_result
+from focaccia.deterministic import DeterministicLog, Event
 
 from validate_qemu import make_argparser, verbosity
 
@@ -121,11 +122,13 @@ class GDBProgramState(ReadableProgramState):
             raise MemoryAccessError(addr, size, str(err))
 
 class GDBServerStateIterator:
-    def __init__(self, remote: str):
+    def __init__(self, remote: str, replay_log: list[Event] | None):
         gdb.execute('set pagination 0')
         gdb.execute('set sysroot')
         gdb.execute('set python print-stack full') # enable complete Python tracebacks
         gdb.execute(f'target remote {remote}')
+        self._replay_log = replay_log
+        self._replay_idx = 0
         self._process = gdb.selected_inferior()
         self._first_next = True
 
@@ -142,6 +145,55 @@ class GDBServerStateIterator:
         self.arch = supported_architectures[archname]
         self.binary = self._process.progspace.filename
 
+    def _handle_sync_point(self, call: int, addr: int, length: int, arch: Arch):
+        def _search_next_event(addr: int, idx: int) -> Event | None:
+            if self._replay_log is None:
+                return idx, None
+            for i in range(idx, len(self._replay_log)):
+                event = self._replay_log[i]
+                if event.pc == addr:
+                    return i, event
+            return idx, None
+
+        _new_pc = addr + length
+        print(f'Handling syscall at {hex(_new_pc)} with call number {call}')
+        if int(call) in arch.get_em_syscalls().keys():
+
+            #print(f'Events: {self._replay_log[self._replay_idx:]}')
+            i, e = _search_next_event(_new_pc, self._replay_idx)
+            if e is None:
+                raise Exception(f'No matching event found in deterministic log \
+                                for syscall at {hex(_new_pc)}')
+
+            e = self._replay_log[i+1]
+            print(f'Adjusting w/ Event: {e}')
+            gdb.execute(f'set $pc = {hex(_new_pc)}')
+            self._replay_idx = i+2
+
+            reg_name = arch.get_syscall_reg()
+            gdb.execute(f'set $rax = {hex(e.registers.get("{reg_name}", 0))}')
+
+            assert(len(arch.get_em_syscalls()[int(call)].outputs) == len(e.mem_writes))
+
+            w_idx = 0
+            for _reg, _size, _type in arch.get_em_syscalls()[int(call)].outputs:
+                if arch.to_regname(_size) is not None:
+                    _size = e.registers[_size]
+                else:
+                    _size = int(_size)
+
+                _addr_rr = e.registers[_reg]
+                _size_rr = e.mem_writes[_addr_rr]
+
+                assert (_size == _size_rr), f'{_size} != {_size_rr}'
+                _addr = gdb.selected_frame().read_register(_reg)
+                # TODO
+                gdb.execute(f'set {{{_type}[_src]}}{_addr} = *({_type}[{_size}] *){_addr}')
+
+            return _new_pc
+
+        return addr
+
     def __iter__(self):
         return self
 
@@ -160,6 +212,11 @@ class GDBServerStateIterator:
             if not self._process.is_valid() or len(self._process.threads()) == 0:
                 raise StopIteration
             new_pc = gdb.selected_frame().read_register('pc')
+            if self._replay_log is not None:
+                asm = gdb.selected_frame().architecture().disassemble(new_pc, count=1)[0]
+                if 'syscall' in asm['asm']:
+                    call_reg = self.arch.get_syscall_reg()
+                    new_pc = self._handle_sync_point(gdb.selected_frame().read_register(call_reg), asm['addr'], asm['length'], self.arch)
 
         return GDBProgramState(self._process, gdb.selected_frame(), self.arch)
 
@@ -341,17 +398,26 @@ def collect_conc_trace(gdb: GDBServerStateIterator, \
     # Note: this may occur when symbolic traces were gathered with a stop address
     if symb_i >= len(strace):
         warn(f'QEMU executed more states than native execution: {symb_i} vs {len(strace)-1}')
-        
+
     return states, matched_transforms
 
 def main():
     args = make_argparser().parse_args()
-    
+
     logging_level = getattr(logging, args.error_level.upper(), logging.INFO)
     logging.basicConfig(level=logging_level, force=True)
 
+    if args.deterministic is not None:
+        replay_log = DeterministicLog(log_dir=args.deterministic)
+
+    if args.deterministic is not None:
+        replay_log = DeterministicLog(log_dir=args.deterministic)
+
+    print(f'Events: {list(replay_log.raw_events())}')
+    print(f'Maps: {list(replay_log.raw_mmaps())}')
+    exit(0)
     try:
-        gdb_server = GDBServerStateIterator(args.remote)
+        gdb_server = GDBServerStateIterator(args.remote, replay_log.events())
     except Exception as e:
         raise Exception(f'Unable to perform basic GDB setup: {e}')
 
diff --git a/src/focaccia/tools/validate_qemu.py b/src/focaccia/tools/validate_qemu.py
index e834a6d..26b7217 100755
--- a/src/focaccia/tools/validate_qemu.py
+++ b/src/focaccia/tools/validate_qemu.py
@@ -78,10 +78,12 @@ memory, and stepping forward by single instructions.
     prog.add_argument('--remote',
                       type=str,
                       help='The hostname:port pair at which to find a QEMU GDB server.')
-    prog.add_argument('--gdb', 
+    prog.add_argument('--gdb',
                       type=str,
                       default='gdb',
                       help='GDB binary to invoke.')
+    prog.add_argument('--deterministic', default=None,
+                      help='The directory containing rr traces')
     return prog
 
 def quoted(s: str) -> str: