17 files changed, 102 insertions, 121 deletions
diff --git a/README.md b/README.md
index 4200b8c..82918b0 100644
--- a/README.md
+++ b/README.md
@@ -38,33 +38,31 @@ The `tools/` directory contains additional utility scripts to work with focaccia
 
 The following files belong to a rough framework for the snapshot comparison engine:
 
- - `main.py`: Entry point to the tool. Handling of command line arguments, pre-processing of input logs, etc.
+ - `focaccia/snapshot.py`: Structures used to work with snapshots. The `ProgramState` class is our primary
+representation of program snapshots.
 
- - `snapshot.py`: Structures used to work with snapshots. The `ProgramState` class is our primary representation of
-program snapshots.
+ - `focaccia/compare.py`: The central algorithms that work on snapshots.
 
- - `compare.py`: The central algorithms that work on snapshots.
-
- - `parser.py`: Utilities for parsing logs from Arancini and QEMU, as well as serializing/deserializing to/from our own
-log format.
-
- - `arch/`: Abstractions over different processor architectures. Will be used to integrate support for more
+ - `focaccia/arch/`: Abstractions over different processor architectures. Will be used to integrate support for more
 architectures later. Currently, we only have X86.
 
-## Concolic execution
+### Concolic execution
 
 The following files belong to a prototype of a data-dependency generator based on symbolic
 execution:
 
- - `symbolic.py`: Algorithms and data structures to compute and manipulate symbolic program transformations. This
-handles the symbolic part of "concolic" execution.
+ - `focaccia/symbolic.py`: Algorithms and data structures to compute and manipulate symbolic program transformations.
+This handles the symbolic part of "concolic" execution.
 
- - `lldb_target.py`: Tools for executing a program concretely and tracking its execution using
+ - `focaccia/lldb_target.py`: Tools for executing a program concretely and tracking its execution using
 [LLDB](https://lldb.llvm.org/). This handles the concrete part of "concolic" execution.
 
- - `miasm_util.py`: Tools to evaluate Miasm's symbolic expressions based on a concrete state. Ties the symbolic and
-concrete parts together into "concolic" execution.
+ - `focaccia/miasm_util.py`: Tools to evaluate Miasm's symbolic expressions based on a concrete state. Ties the symbolic
+and concrete parts together into "concolic" execution.
+
+### Helpers
 
-## Helpers
+ - `focaccia/parser.py`: Utilities for parsing logs from Arancini and QEMU, as well as serializing/deserializing to/from
+our own log format.
 
  - `miasm_test.py`: A test script that traces a program concolically.
diff --git a/main.py b/focaccia.py
index 3167bbd..e140337 100755
--- a/main.py
+++ b/focaccia.py
@@ -4,14 +4,14 @@ import argparse
 import platform
 from typing import Iterable
 
-from arch import x86
-from compare import compare_simple, compare_symbolic, \
-                    ErrorSeverity, ErrorTypes
-from lldb_target import LLDBConcreteTarget
-from parser import parse_arancini
-from snapshot import ProgramState
-from symbolic import SymbolicTransform, collect_symbolic_trace
-from utils import check_version, print_separator
+from focaccia.arch import x86
+from focaccia.compare import compare_simple, compare_symbolic, \
+                             ErrorSeverity, ErrorTypes
+from focaccia.lldb_target import LLDBConcreteTarget
+from focaccia.parser import parse_arancini
+from focaccia.snapshot import ProgramState
+from focaccia.symbolic import SymbolicTransform, collect_symbolic_trace
+from focaccia.utils import print_separator
 
 def run_native_execution(oracle_program: str, breakpoints: Iterable[int]):
     """Gather snapshots from a native execution via an external debugger.
@@ -175,7 +175,7 @@ def main():
         assert(program is not None)
 
         print(f'Tracing {program} symbolically with arguments {prog_args}...')
-        transforms = collect_symbolic_trace(program, [program, *prog_args])
+        transforms = collect_symbolic_trace(program, prog_args)
         txl, transforms = match_traces(txl, transforms)
         result = compare_symbolic(txl, transforms)
     else:
@@ -183,6 +183,5 @@ def main():
 
     print_result(result, verbosity[args.error_level])
 
-if __name__ == "__main__":
-    check_version('3.7')
+if __name__ == '__main__':
     main()
diff --git a/focaccia/__init__.py b/focaccia/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/focaccia/__init__.py
diff --git a/arch/__init__.py b/focaccia/arch/__init__.py
index 2926d20..2926d20 100644
--- a/arch/__init__.py
+++ b/focaccia/arch/__init__.py
diff --git a/arch/arch.py b/focaccia/arch/arch.py
index f2be5cb..f2be5cb 100644
--- a/arch/arch.py
+++ b/focaccia/arch/arch.py
diff --git a/arch/x86.py b/focaccia/arch/x86.py
index 95e1a82..95e1a82 100644
--- a/arch/x86.py
+++ b/focaccia/arch/x86.py
diff --git a/compare.py b/focaccia/compare.py
index dfa5dbd..066127d 100644
--- a/compare.py
+++ b/focaccia/compare.py
@@ -1,8 +1,8 @@
 from functools import total_ordering
 from typing import Self
 
-from snapshot import ProgramState, MemoryAccessError
-from symbolic import SymbolicTransform
+from .snapshot import ProgramState, MemoryAccessError
+from .symbolic import SymbolicTransform
 
 @total_ordering
 class ErrorSeverity:
diff --git a/lldb_target.py b/focaccia/lldb_target.py
index b96f66b..146891f 100644
--- a/lldb_target.py
+++ b/focaccia/lldb_target.py
@@ -1,7 +1,7 @@
 import lldb
 
-from arch import supported_architectures, x86
-from snapshot import ProgramState
+from .arch import supported_architectures, x86
+from .snapshot import ProgramState
 
 class MemoryMap:
     """Description of a range of mapped memory.
@@ -30,9 +30,6 @@ class LLDBConcreteTarget:
     def __init__(self, executable: str, argv: list[str] = []):
         """Construct an LLDB concrete target. Stop at entry.
 
-        :param argv: The full argv array, including the executable's path as
-                     the first argument (as is convention).
-
         :raises RuntimeError: If the process is unable to launch.
         """
         self.debugger = lldb.SBDebugger.Create()
@@ -46,8 +43,10 @@ class LLDBConcreteTarget:
         self.error = lldb.SBError()
         self.listener = self.debugger.GetListener()
         self.process = self.target.Launch(self.listener,
-                                          argv, None, None,
-                                          None, None, None, 0,
+                                          argv, None,        # argv, envp
+                                          None, None, None,  # stdin, stdout, stderr
+                                          None,              # working directory
+                                          0,
                                           True, self.error)
         if not self.process.IsValid():
             raise RuntimeError(f'[In LLDBConcreteTarget.__init__]: Failed to'
diff --git a/miasm_util.py b/focaccia/miasm_util.py
index 0d3ab3d..d9a9936 100644
--- a/miasm_util.py
+++ b/focaccia/miasm_util.py
@@ -6,7 +6,7 @@ from miasm.expression.expression import Expr, ExprOp, ExprId, ExprLoc, \
                                         ExprSlice, ExprCond
 from miasm.expression.simplifications import expr_simp_explicit
 
-from snapshot import ProgramState
+from .snapshot import ProgramState
 
 def simp_segm(expr_simp, expr: ExprOp):
     """Simplify a segmentation expression to an addition of the segment
diff --git a/parser.py b/focaccia/parser.py
index 391d58a..8680eed 100644
--- a/parser.py
+++ b/focaccia/parser.py
@@ -5,8 +5,8 @@ import json
 import re
 from typing import TextIO
 
-from arch import supported_architectures, Arch
-from snapshot import ProgramState
+from .arch import supported_architectures, Arch
+from .snapshot import ProgramState
 
 class ParseError(Exception):
     """A parse error."""
@@ -139,18 +139,3 @@ def parse_arancini(stream: TextIO, arch: Arch) -> list[ProgramState]:
                 states[-1].set(regname, int(value, 16))
 
     return states
-
-if __name__ == "__main__":
-    from arch import x86
-    with open('qemu.log', 'r') as file:
-        states = parse_qemu(file, x86.ArchX86())
-        print(f'Parsed {len(states)} states from QEMU log.')
-    with open('dump.qemu', 'w') as file:
-        serialize_snapshots(states, file)
-
-    with open('emulator-log.txt', 'r') as file:
-        states = parse_arancini(file, x86.ArchX86())
-        print(f'Parsed {len(states)} states from Arancini log.')
-    with open('dump.arancini', 'w') as file:
-        serialize_snapshots(states, file)
-    exit(0)
diff --git a/snapshot.py b/focaccia/snapshot.py
index 9c9e4b3..0f10dda 100644
--- a/snapshot.py
+++ b/focaccia/snapshot.py
@@ -1,4 +1,4 @@
-from arch.arch import Arch
+from .arch.arch import Arch
 
 class MemoryAccessError(Exception):
     def __init__(self, addr: int, size: int, msg: str):
@@ -38,8 +38,8 @@ class SparseMemory:
             page_addr, off = self._to_page_addr_and_offset(addr)
             if page_addr not in self._pages:
                 raise MemoryAccessError(addr, size,
-                                        f'Address {addr} is not contained in'
-                                        f' the sparse memory.')
+                                        f'Address {hex(addr)} is not contained'
+                                        f' in the sparse memory.')
             data = self._pages[page_addr]
             assert(len(data) == self.page_size)
             read_size = min(size, self.page_size - off)
diff --git a/symbolic.py b/focaccia/symbolic.py
index 6e70bc9..e132ebd 100644
--- a/symbolic.py
+++ b/focaccia/symbolic.py
@@ -11,10 +11,12 @@ from miasm.ir.symbexec import SymbolicExecutionEngine
 from miasm.ir.ir import IRBlock
 from miasm.expression.expression import Expr, ExprId, ExprMem, ExprInt
 
-from lldb_target import LLDBConcreteTarget
-from miasm_util import MiasmConcreteState, eval_expr
-from snapshot import ProgramState
-from arch import Arch, supported_architectures
+from .arch import Arch, supported_architectures
+from .lldb_target import LLDBConcreteTarget, \
+                         ConcreteRegisterError, \
+                         ConcreteMemoryError
+from .miasm_util import MiasmConcreteState, eval_expr
+from .snapshot import ProgramState
 
 class SymbolicTransform:
     def __init__(self, from_addr: int, to_addr: int):
@@ -75,7 +77,7 @@ class MiasmSymbolicTransform(SymbolicTransform):
         class MiasmSymbolicState(MiasmConcreteState):
             """Drop-in replacement for MiasmConcreteState in eval_expr that
             returns the current transform's symbolic equations instead of
-            symbolic values. Calling eval_expr with this effectively nests the
+            concrete values. Calling eval_expr with this effectively nests the
             transformation into the concatenated transformation.
 
             We inherit from `MiasmSymbolicTransform` only for the purpose of
@@ -275,14 +277,48 @@ def _run_block(pc: int, conc_state: MiasmConcreteState, ctx: DisassemblyContext)
             # instructions are translated to multiple IR instructions.
             pass
 
+class LLDBConcreteState:
+    """A back-end replacement for the `ProgramState` object from which
+    `MiasmConcreteState` reads its values. This reads values directly from an
+    LLDB target instead. This saves us the trouble of recording a full program
+    state, and allows us instead to read values from LLDB on demand.
+    """
+    def __init__(self, target: LLDBConcreteTarget, arch: Arch):
+        self._target = target
+        self._arch = arch
+
+    def read(self, reg: str) -> int | None:
+        from focaccia.arch import x86
+
+        regname = self._arch.to_regname(reg)
+        if regname is None:
+            return None
+
+        try:
+            return self._target.read_register(regname)
+        except ConcreteRegisterError:
+            # Special case for X86
+            if self._arch.archname == x86.archname:
+                rflags = x86.decompose_rflags(self._target.read_register('rflags'))
+                if regname in rflags:
+                    return rflags[regname]
+            return None
+
+    def read_memory(self, addr: int, size: int):
+        try:
+            return self._target.read_memory(addr, size)
+        except ConcreteMemoryError:
+            return None
+
 def collect_symbolic_trace(binary: str,
-                           argv: list[str],
+                           args: list[str],
                            start_addr: int | None = None
                            ) -> list[SymbolicTransform]:
     """Execute a program and compute state transformations between executed
     instructions.
 
     :param binary: The binary to trace.
+    :param args:   Arguments to the program.
     """
     ctx = DisassemblyContext(binary)
 
@@ -298,16 +334,16 @@ def collect_symbolic_trace(binary: str,
     else:
         pc = start_addr
 
-    target = LLDBConcreteTarget(binary, argv)
+    target = LLDBConcreteTarget(binary, args)
     if target.read_register('pc') != pc:
         target.set_breakpoint(pc)
         target.run()
         target.remove_breakpoint(pc)
+    conc_state = LLDBConcreteState(target, arch)
 
     symb_trace = [] # The resulting list of symbolic transforms per instruction
 
     # Run until no more states can be reached
-    initial_state = target.record_snapshot()
     while pc is not None:
         assert(target.read_register('pc') == pc)
 
@@ -317,9 +353,8 @@ def collect_symbolic_trace(binary: str,
         try:
             pc, strace = _run_block(
                 pc,
-                MiasmConcreteState(initial_state, ctx.loc_db),
-                ctx
-            )
+                MiasmConcreteState(conc_state, ctx.loc_db),
+                ctx)
         except DisassemblyError as err:
             # This happens if we encounter an instruction that is not
             # implemented by Miasm. Try to skip that instruction and continue
@@ -339,7 +374,6 @@ def collect_symbolic_trace(binary: str,
 
             symb_trace.append((err.faulty_pc, {}))  # Generate empty transform
             pc = target.read_register('pc')
-            initial_state = target.record_snapshot()
             continue
 
         if pc is None:
@@ -358,17 +392,14 @@ def collect_symbolic_trace(binary: str,
         symb_trace.extend(strace)
 
         # Use this for extensive trace debugging
-        if [a for a, _ in strace] != ctrace:
-            print(f'[WARNING] Symbolic trace and concrete trace are not equal!'
-                  f'\n    symbolic: {[hex(a) for a, _ in strace]}'
-                  f'\n    concrete: {[hex(a) for a in ctrace]}')
+        #if [a for a, _ in strace] != ctrace:
+        #    print(f'[WARNING] Symbolic trace and concrete trace are not equal!'
+        #          f'\n    symbolic: {[hex(a) for a, _ in strace]}'
+        #          f'\n    concrete: {[hex(a) for a in ctrace]}')
 
         if target.is_exited():
             break
 
-        # Query the new reference state for symbolic execution
-        initial_state = target.record_snapshot()
-
     res = []
     for (start, diff), (end, _) in zip(symb_trace[:-1], symb_trace[1:]):
         res.append(MiasmSymbolicTransform(diff, arch, start, end))
diff --git a/focaccia/utils.py b/focaccia/utils.py
new file mode 100644
index 0000000..0c6f292
--- /dev/null
+++ b/focaccia/utils.py
@@ -0,0 +1,7 @@
+import sys
+import shutil
+
+def print_separator(separator: str = '-', stream=sys.stdout, count: int = 80):
+    maxtermsize = count
+    termsize = shutil.get_terminal_size((80, 20)).columns
+    print(separator * min(termsize, maxtermsize), file=stream)
diff --git a/miasm_test.py b/miasm_test.py
index 97c23de..8d5bd9a 100644
--- a/miasm_test.py
+++ b/miasm_test.py
@@ -1,6 +1,6 @@
 import argparse
 
-from symbolic import collect_symbolic_trace
+from focaccia.symbolic import collect_symbolic_trace
 
 def main():
     program = argparse.ArgumentParser()
@@ -21,7 +21,7 @@ def main():
             print(f'Start address must be a hexadecimal number. Exiting.')
             exit(1)
 
-    strace = collect_symbolic_trace(binary, [binary, *argv], pc)
+    strace = collect_symbolic_trace(binary, argv, pc)
 
     print(f'--- {len(strace)} instructions traced.')
     print(f'--- No new PC found. Exiting.')
diff --git a/test/test_sparse_memory.py b/test/test_sparse_memory.py
index 87b4456..4fd9cba 100644
--- a/test/test_sparse_memory.py
+++ b/test/test_sparse_memory.py
@@ -1,6 +1,6 @@
 import unittest
 
-from snapshot import SparseMemory, MemoryAccessError
+from focaccia.snapshot import SparseMemory, MemoryAccessError
 
 class TestSparseMemory(unittest.TestCase):
     def test_oob_read(self):
diff --git a/tools/qemu_tool.py b/tools/qemu_tool.py
index d5f78af..c7730fd 100644
--- a/tools/qemu_tool.py
+++ b/tools/qemu_tool.py
@@ -9,10 +9,10 @@ import shlex
 import subprocess
 from typing import TextIO
 
-import parser
-from arch import x86
-from lldb_target import MemoryMap
-from snapshot import ProgramState
+import focaccia.parser as parser
+from focaccia.arch import x86
+from focaccia.lldb_target import MemoryMap
+from focaccia.snapshot import ProgramState
 
 def parse_memory_maps(stream: TextIO) -> tuple[list[MemoryMap], str]:
     """
diff --git a/utils.py b/utils.py
deleted file mode 100644
index f2c2256..0000000
--- a/utils.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import sys
-import shutil
-
-def print_separator(separator: str = '-', stream=sys.stdout, count: int = 80):
-    maxtermsize = count
-    termsize = shutil.get_terminal_size((80, 20)).columns
-    print(separator * min(termsize, maxtermsize), file=stream)
-
-def check_version(version: str):
-    # Script depends on ordered dicts in default dict()
-    split = version.split('.')
-    major = int(split[0])
-    minor = int(split[1])
-    if sys.version_info.major < major and sys.version_info.minor < minor:
-        raise EnvironmentError("Expected at least Python 3.7")
-
-def to_str(expr):
-    """Convert a claripy expression to a nice string representation.
-
-    Actually, the resulting representation is not very nice at all. It mostly
-    serves debugging purposes.
-    """
-    import claripy
-
-    if not issubclass(type(expr), claripy.ast.Base):
-        return f'{type(expr)}[{str(expr)}]'
-
-    assert(expr.depth > 0)
-    if expr.depth == 1:
-        if expr.symbolic:
-            name = expr._encoded_name.decode()
-            return f'symbol[{name}]'
-        else:
-            assert(expr.concrete)
-            return f'value{expr.length}[{hex(expr.v)}]'
-
-    args = [to_str(child) for child in expr.args]
-    return f'expr[{str(expr.op)}({", ".join(args)})]'