about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorTheofilos Augoustis <theofilos.augoustis@gmail.com>2024-10-13 17:16:00 +0200
committerTheofilos Augoustis <theofilos.augoustis@gmail.com>2024-10-13 17:16:00 +0200
commitaa946a8b14b7970c3c8f52626b82068cdf39cf94 (patch)
tree9ccee22a66533cb4183047c3cac93563d2d8ca84
parent4f005b2917ff83acb966495741487029ab34ab1a (diff)
downloadfocaccia-aa946a8b14b7970c3c8f52626b82068cdf39cf94.tar.gz
focaccia-aa946a8b14b7970c3c8f52626b82068cdf39cf94.zip
Read instructions directly from program memory
Instead of loading an executable file and disassembling instructions
from there, load instructions directly from the concrete execution's
memory. This allows symbolic tracing to work for non-statically compiled
executables as well as JIT-compiled code.
-rw-r--r--focaccia/lldb_target.py7
-rw-r--r--focaccia/symbolic.py44
2 files changed, 32 insertions, 19 deletions
diff --git a/focaccia/lldb_target.py b/focaccia/lldb_target.py
index 2ed0757..1f31337 100644
--- a/focaccia/lldb_target.py
+++ b/focaccia/lldb_target.py
@@ -189,6 +189,13 @@ class LLDBConcreteTarget:
         try:
             reg = self._get_register(regname)
             assert(reg.IsValid())
+            if reg.size > 8:  # reg is a vector register
+                reg.data.byte_order = lldb.eByteOrderLittle
+                val = 0
+                for ui64 in reversed(reg.data.uint64s):
+                    val <<= 64
+                    val |= ui64
+                return val
             return reg.GetValueAsUnsigned()
         except ConcreteRegisterError as err:
             flags = self.read_flags()
diff --git a/focaccia/symbolic.py b/focaccia/symbolic.py
index 6338a14..0f381a4 100644
--- a/focaccia/symbolic.py
+++ b/focaccia/symbolic.py
@@ -420,26 +420,32 @@ class SymbolicTransform:
 
         return res[:-1]  # Remove trailing newline
 
+class MemoryBinstream:
+    """A binary stream interface that reads bytes from a program state's
+    memory."""
+    def __init__(self, state: ReadableProgramState):
+        self._state = state
+
+    def __len__(self):
+        return 0xffffffff
+
+    def __getitem__(self, key: int | slice):
+        if isinstance(key, slice):
+            return self._state.read_memory(key.start, key.stop - key.start)
+        return self._state.read_memory(key, 1)
+
 class DisassemblyContext:
-    def __init__(self, binary):
+    def __init__(self, target: ReadableProgramState):
         self.loc_db = LocationDB()
 
-        # Load the binary
-        with open(binary, 'rb') as bin_file:
-            cont = ContainerELF.from_stream(bin_file, self.loc_db)
-        self.entry_point = cont.entry_point
-
         # Determine the binary's architecture
-        self.machine = Machine(cont.arch)
-        if self.machine.name not in supported_architectures:
-            raise NotImplementedError(f'[ERROR] {self.machine.name} is not'
-                                      f' supported.')
-        self.arch = supported_architectures[self.machine.name]
-        """Focaccia's description of an instruction set architecture."""
+        self.machine = make_machine(target.arch)
+        self.arch = target.arch
 
         # Create disassembly/lifting context
         assert(self.machine.dis_engine is not None)
-        self.mdis = self.machine.dis_engine(cont.bin_stream, loc_db=self.loc_db)
+        binstream = MemoryBinstream(target)
+        self.mdis = self.machine.dis_engine(binstream, loc_db=self.loc_db)
         self.mdis.follow_call = True
         self.lifter = self.machine.lifter(self.loc_db)
 
@@ -573,8 +579,8 @@ class _LLDBConcreteState(ReadableProgramState):
     target. This saves us the trouble of recording a full program state, and
     allows us instead to read values from LLDB on demand.
     """
-    def __init__(self, target: LLDBConcreteTarget, arch: Arch):
-        super().__init__(arch)
+    def __init__(self, target: LLDBConcreteTarget):
+        super().__init__(target.arch)
         self._target = target
 
     def read_register(self, reg: str) -> int:
@@ -604,14 +610,14 @@ def collect_symbolic_trace(env: TraceEnvironment,
     """
     binary = env.binary_name
 
-    ctx = DisassemblyContext(binary)
-    arch = ctx.arch
-
     # Set up concrete reference state
     target = LLDBConcreteTarget(binary, env.argv, env.envp)
     if start_addr is not None:
         target.run_until(start_addr)
-    lldb_state = _LLDBConcreteState(target, arch)
+    lldb_state = _LLDBConcreteState(target)
+
+    ctx = DisassemblyContext(lldb_state)
+    arch = ctx.arch
 
     # Trace concolically
     strace: list[SymbolicTransform] = []