diff options
| -rw-r--r-- | README.md | 30 | ||||
| -rwxr-xr-x | focaccia.py (renamed from main.py) | 21 | ||||
| -rw-r--r-- | focaccia/__init__.py | 0 | ||||
| -rw-r--r-- | focaccia/arch/__init__.py (renamed from arch/__init__.py) | 0 | ||||
| -rw-r--r-- | focaccia/arch/arch.py (renamed from arch/arch.py) | 0 | ||||
| -rw-r--r-- | focaccia/arch/x86.py (renamed from arch/x86.py) | 0 | ||||
| -rw-r--r-- | focaccia/compare.py (renamed from compare.py) | 4 | ||||
| -rw-r--r-- | focaccia/lldb_target.py (renamed from lldb_target.py) | 13 | ||||
| -rw-r--r-- | focaccia/miasm_util.py (renamed from miasm_util.py) | 2 | ||||
| -rw-r--r-- | focaccia/parser.py (renamed from parser.py) | 19 | ||||
| -rw-r--r-- | focaccia/snapshot.py (renamed from snapshot.py) | 6 | ||||
| -rw-r--r-- | focaccia/symbolic.py (renamed from symbolic.py) | 69 | ||||
| -rw-r--r-- | focaccia/utils.py | 7 | ||||
| -rw-r--r-- | miasm_test.py | 4 | ||||
| -rw-r--r-- | test/test_sparse_memory.py | 2 | ||||
| -rw-r--r-- | tools/qemu_tool.py | 8 | ||||
| -rw-r--r-- | utils.py | 38 |
17 files changed, 102 insertions, 121 deletions
diff --git a/README.md b/README.md index 4200b8c..82918b0 100644 --- a/README.md +++ b/README.md @@ -38,33 +38,31 @@ The `tools/` directory contains additional utility scripts to work with focaccia The following files belong to a rough framework for the snapshot comparison engine: - - `main.py`: Entry point to the tool. Handling of command line arguments, pre-processing of input logs, etc. + - `focaccia/snapshot.py`: Structures used to work with snapshots. The `ProgramState` class is our primary +representation of program snapshots. - - `snapshot.py`: Structures used to work with snapshots. The `ProgramState` class is our primary representation of -program snapshots. + - `focaccia/compare.py`: The central algorithms that work on snapshots. - - `compare.py`: The central algorithms that work on snapshots. - - - `parser.py`: Utilities for parsing logs from Arancini and QEMU, as well as serializing/deserializing to/from our own -log format. - - - `arch/`: Abstractions over different processor architectures. Will be used to integrate support for more + - `focaccia/arch/`: Abstractions over different processor architectures. Will be used to integrate support for more architectures later. Currently, we only have X86. -## Concolic execution +### Concolic execution The following files belong to a prototype of a data-dependency generator based on symbolic execution: - - `symbolic.py`: Algorithms and data structures to compute and manipulate symbolic program transformations. This -handles the symbolic part of "concolic" execution. + - `focaccia/symbolic.py`: Algorithms and data structures to compute and manipulate symbolic program transformations. +This handles the symbolic part of "concolic" execution. - - `lldb_target.py`: Tools for executing a program concretely and tracking its execution using + - `focaccia/lldb_target.py`: Tools for executing a program concretely and tracking its execution using [LLDB](https://lldb.llvm.org/). This handles the concrete part of "concolic" execution. - - `miasm_util.py`: Tools to evaluate Miasm's symbolic expressions based on a concrete state. Ties the symbolic and -concrete parts together into "concolic" execution. + - `focaccia/miasm_util.py`: Tools to evaluate Miasm's symbolic expressions based on a concrete state. Ties the symbolic +and concrete parts together into "concolic" execution. + +### Helpers -## Helpers + - `focaccia/parser.py`: Utilities for parsing logs from Arancini and QEMU, as well as serializing/deserializing to/from +our own log format. - `miasm_test.py`: A test script that traces a program concolically. diff --git a/main.py b/focaccia.py index 3167bbd..e140337 100755 --- a/main.py +++ b/focaccia.py @@ -4,14 +4,14 @@ import argparse import platform from typing import Iterable -from arch import x86 -from compare import compare_simple, compare_symbolic, \ - ErrorSeverity, ErrorTypes -from lldb_target import LLDBConcreteTarget -from parser import parse_arancini -from snapshot import ProgramState -from symbolic import SymbolicTransform, collect_symbolic_trace -from utils import check_version, print_separator +from focaccia.arch import x86 +from focaccia.compare import compare_simple, compare_symbolic, \ + ErrorSeverity, ErrorTypes +from focaccia.lldb_target import LLDBConcreteTarget +from focaccia.parser import parse_arancini +from focaccia.snapshot import ProgramState +from focaccia.symbolic import SymbolicTransform, collect_symbolic_trace +from focaccia.utils import print_separator def run_native_execution(oracle_program: str, breakpoints: Iterable[int]): """Gather snapshots from a native execution via an external debugger. @@ -175,7 +175,7 @@ def main(): assert(program is not None) print(f'Tracing {program} symbolically with arguments {prog_args}...') - transforms = collect_symbolic_trace(program, [program, *prog_args]) + transforms = collect_symbolic_trace(program, prog_args) txl, transforms = match_traces(txl, transforms) result = compare_symbolic(txl, transforms) else: @@ -183,6 +183,5 @@ def main(): print_result(result, verbosity[args.error_level]) -if __name__ == "__main__": - check_version('3.7') +if __name__ == '__main__': main() diff --git a/focaccia/__init__.py b/focaccia/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/focaccia/__init__.py diff --git a/arch/__init__.py b/focaccia/arch/__init__.py index 2926d20..2926d20 100644 --- a/arch/__init__.py +++ b/focaccia/arch/__init__.py diff --git a/arch/arch.py b/focaccia/arch/arch.py index f2be5cb..f2be5cb 100644 --- a/arch/arch.py +++ b/focaccia/arch/arch.py diff --git a/arch/x86.py b/focaccia/arch/x86.py index 95e1a82..95e1a82 100644 --- a/arch/x86.py +++ b/focaccia/arch/x86.py diff --git a/compare.py b/focaccia/compare.py index dfa5dbd..066127d 100644 --- a/compare.py +++ b/focaccia/compare.py @@ -1,8 +1,8 @@ from functools import total_ordering from typing import Self -from snapshot import ProgramState, MemoryAccessError -from symbolic import SymbolicTransform +from .snapshot import ProgramState, MemoryAccessError +from .symbolic import SymbolicTransform @total_ordering class ErrorSeverity: diff --git a/lldb_target.py b/focaccia/lldb_target.py index b96f66b..146891f 100644 --- a/lldb_target.py +++ b/focaccia/lldb_target.py @@ -1,7 +1,7 @@ import lldb -from arch import supported_architectures, x86 -from snapshot import ProgramState +from .arch import supported_architectures, x86 +from .snapshot import ProgramState class MemoryMap: """Description of a range of mapped memory. @@ -30,9 +30,6 @@ class LLDBConcreteTarget: def __init__(self, executable: str, argv: list[str] = []): """Construct an LLDB concrete target. Stop at entry. - :param argv: The full argv array, including the executable's path as - the first argument (as is convention). - :raises RuntimeError: If the process is unable to launch. """ self.debugger = lldb.SBDebugger.Create() @@ -46,8 +43,10 @@ class LLDBConcreteTarget: self.error = lldb.SBError() self.listener = self.debugger.GetListener() self.process = self.target.Launch(self.listener, - argv, None, None, - None, None, None, 0, + argv, None, # argv, envp + None, None, None, # stdin, stdout, stderr + None, # working directory + 0, True, self.error) if not self.process.IsValid(): raise RuntimeError(f'[In LLDBConcreteTarget.__init__]: Failed to' diff --git a/miasm_util.py b/focaccia/miasm_util.py index 0d3ab3d..d9a9936 100644 --- a/miasm_util.py +++ b/focaccia/miasm_util.py @@ -6,7 +6,7 @@ from miasm.expression.expression import Expr, ExprOp, ExprId, ExprLoc, \ ExprSlice, ExprCond from miasm.expression.simplifications import expr_simp_explicit -from snapshot import ProgramState +from .snapshot import ProgramState def simp_segm(expr_simp, expr: ExprOp): """Simplify a segmentation expression to an addition of the segment diff --git a/parser.py b/focaccia/parser.py index 391d58a..8680eed 100644 --- a/parser.py +++ b/focaccia/parser.py @@ -5,8 +5,8 @@ import json import re from typing import TextIO -from arch import supported_architectures, Arch -from snapshot import ProgramState +from .arch import supported_architectures, Arch +from .snapshot import ProgramState class ParseError(Exception): """A parse error.""" @@ -139,18 +139,3 @@ def parse_arancini(stream: TextIO, arch: Arch) -> list[ProgramState]: states[-1].set(regname, int(value, 16)) return states - -if __name__ == "__main__": - from arch import x86 - with open('qemu.log', 'r') as file: - states = parse_qemu(file, x86.ArchX86()) - print(f'Parsed {len(states)} states from QEMU log.') - with open('dump.qemu', 'w') as file: - serialize_snapshots(states, file) - - with open('emulator-log.txt', 'r') as file: - states = parse_arancini(file, x86.ArchX86()) - print(f'Parsed {len(states)} states from Arancini log.') - with open('dump.arancini', 'w') as file: - serialize_snapshots(states, file) - exit(0) diff --git a/snapshot.py b/focaccia/snapshot.py index 9c9e4b3..0f10dda 100644 --- a/snapshot.py +++ b/focaccia/snapshot.py @@ -1,4 +1,4 @@ -from arch.arch import Arch +from .arch.arch import Arch class MemoryAccessError(Exception): def __init__(self, addr: int, size: int, msg: str): @@ -38,8 +38,8 @@ class SparseMemory: page_addr, off = self._to_page_addr_and_offset(addr) if page_addr not in self._pages: raise MemoryAccessError(addr, size, - f'Address {addr} is not contained in' - f' the sparse memory.') + f'Address {hex(addr)} is not contained' + f' in the sparse memory.') data = self._pages[page_addr] assert(len(data) == self.page_size) read_size = min(size, self.page_size - off) diff --git a/symbolic.py b/focaccia/symbolic.py index 6e70bc9..e132ebd 100644 --- a/symbolic.py +++ b/focaccia/symbolic.py @@ -11,10 +11,12 @@ from miasm.ir.symbexec import SymbolicExecutionEngine from miasm.ir.ir import IRBlock from miasm.expression.expression import Expr, ExprId, ExprMem, ExprInt -from lldb_target import LLDBConcreteTarget -from miasm_util import MiasmConcreteState, eval_expr -from snapshot import ProgramState -from arch import Arch, supported_architectures +from .arch import Arch, supported_architectures +from .lldb_target import LLDBConcreteTarget, \ + ConcreteRegisterError, \ + ConcreteMemoryError +from .miasm_util import MiasmConcreteState, eval_expr +from .snapshot import ProgramState class SymbolicTransform: def __init__(self, from_addr: int, to_addr: int): @@ -75,7 +77,7 @@ class MiasmSymbolicTransform(SymbolicTransform): class MiasmSymbolicState(MiasmConcreteState): """Drop-in replacement for MiasmConcreteState in eval_expr that returns the current transform's symbolic equations instead of - symbolic values. Calling eval_expr with this effectively nests the + concrete values. Calling eval_expr with this effectively nests the transformation into the concatenated transformation. We inherit from `MiasmSymbolicTransform` only for the purpose of @@ -275,14 +277,48 @@ def _run_block(pc: int, conc_state: MiasmConcreteState, ctx: DisassemblyContext) # instructions are translated to multiple IR instructions. pass +class LLDBConcreteState: + """A back-end replacement for the `ProgramState` object from which + `MiasmConcreteState` reads its values. This reads values directly from an + LLDB target instead. This saves us the trouble of recording a full program + state, and allows us instead to read values from LLDB on demand. + """ + def __init__(self, target: LLDBConcreteTarget, arch: Arch): + self._target = target + self._arch = arch + + def read(self, reg: str) -> int | None: + from focaccia.arch import x86 + + regname = self._arch.to_regname(reg) + if regname is None: + return None + + try: + return self._target.read_register(regname) + except ConcreteRegisterError: + # Special case for X86 + if self._arch.archname == x86.archname: + rflags = x86.decompose_rflags(self._target.read_register('rflags')) + if regname in rflags: + return rflags[regname] + return None + + def read_memory(self, addr: int, size: int): + try: + return self._target.read_memory(addr, size) + except ConcreteMemoryError: + return None + def collect_symbolic_trace(binary: str, - argv: list[str], + args: list[str], start_addr: int | None = None ) -> list[SymbolicTransform]: """Execute a program and compute state transformations between executed instructions. :param binary: The binary to trace. + :param args: Arguments to the program. """ ctx = DisassemblyContext(binary) @@ -298,16 +334,16 @@ def collect_symbolic_trace(binary: str, else: pc = start_addr - target = LLDBConcreteTarget(binary, argv) + target = LLDBConcreteTarget(binary, args) if target.read_register('pc') != pc: target.set_breakpoint(pc) target.run() target.remove_breakpoint(pc) + conc_state = LLDBConcreteState(target, arch) symb_trace = [] # The resulting list of symbolic transforms per instruction # Run until no more states can be reached - initial_state = target.record_snapshot() while pc is not None: assert(target.read_register('pc') == pc) @@ -317,9 +353,8 @@ def collect_symbolic_trace(binary: str, try: pc, strace = _run_block( pc, - MiasmConcreteState(initial_state, ctx.loc_db), - ctx - ) + MiasmConcreteState(conc_state, ctx.loc_db), + ctx) except DisassemblyError as err: # This happens if we encounter an instruction that is not # implemented by Miasm. Try to skip that instruction and continue @@ -339,7 +374,6 @@ def collect_symbolic_trace(binary: str, symb_trace.append((err.faulty_pc, {})) # Generate empty transform pc = target.read_register('pc') - initial_state = target.record_snapshot() continue if pc is None: @@ -358,17 +392,14 @@ def collect_symbolic_trace(binary: str, symb_trace.extend(strace) # Use this for extensive trace debugging - if [a for a, _ in strace] != ctrace: - print(f'[WARNING] Symbolic trace and concrete trace are not equal!' - f'\n symbolic: {[hex(a) for a, _ in strace]}' - f'\n concrete: {[hex(a) for a in ctrace]}') + #if [a for a, _ in strace] != ctrace: + # print(f'[WARNING] Symbolic trace and concrete trace are not equal!' + # f'\n symbolic: {[hex(a) for a, _ in strace]}' + # f'\n concrete: {[hex(a) for a in ctrace]}') if target.is_exited(): break - # Query the new reference state for symbolic execution - initial_state = target.record_snapshot() - res = [] for (start, diff), (end, _) in zip(symb_trace[:-1], symb_trace[1:]): res.append(MiasmSymbolicTransform(diff, arch, start, end)) diff --git a/focaccia/utils.py b/focaccia/utils.py new file mode 100644 index 0000000..0c6f292 --- /dev/null +++ b/focaccia/utils.py @@ -0,0 +1,7 @@ +import sys +import shutil + +def print_separator(separator: str = '-', stream=sys.stdout, count: int = 80): + maxtermsize = count + termsize = shutil.get_terminal_size((80, 20)).columns + print(separator * min(termsize, maxtermsize), file=stream) diff --git a/miasm_test.py b/miasm_test.py index 97c23de..8d5bd9a 100644 --- a/miasm_test.py +++ b/miasm_test.py @@ -1,6 +1,6 @@ import argparse -from symbolic import collect_symbolic_trace +from focaccia.symbolic import collect_symbolic_trace def main(): program = argparse.ArgumentParser() @@ -21,7 +21,7 @@ def main(): print(f'Start address must be a hexadecimal number. Exiting.') exit(1) - strace = collect_symbolic_trace(binary, [binary, *argv], pc) + strace = collect_symbolic_trace(binary, argv, pc) print(f'--- {len(strace)} instructions traced.') print(f'--- No new PC found. Exiting.') diff --git a/test/test_sparse_memory.py b/test/test_sparse_memory.py index 87b4456..4fd9cba 100644 --- a/test/test_sparse_memory.py +++ b/test/test_sparse_memory.py @@ -1,6 +1,6 @@ import unittest -from snapshot import SparseMemory, MemoryAccessError +from focaccia.snapshot import SparseMemory, MemoryAccessError class TestSparseMemory(unittest.TestCase): def test_oob_read(self): diff --git a/tools/qemu_tool.py b/tools/qemu_tool.py index d5f78af..c7730fd 100644 --- a/tools/qemu_tool.py +++ b/tools/qemu_tool.py @@ -9,10 +9,10 @@ import shlex import subprocess from typing import TextIO -import parser -from arch import x86 -from lldb_target import MemoryMap -from snapshot import ProgramState +import focaccia.parser as parser +from focaccia.arch import x86 +from focaccia.lldb_target import MemoryMap +from focaccia.snapshot import ProgramState def parse_memory_maps(stream: TextIO) -> tuple[list[MemoryMap], str]: """ diff --git a/utils.py b/utils.py deleted file mode 100644 index f2c2256..0000000 --- a/utils.py +++ /dev/null @@ -1,38 +0,0 @@ -import sys -import shutil - -def print_separator(separator: str = '-', stream=sys.stdout, count: int = 80): - maxtermsize = count - termsize = shutil.get_terminal_size((80, 20)).columns - print(separator * min(termsize, maxtermsize), file=stream) - -def check_version(version: str): - # Script depends on ordered dicts in default dict() - split = version.split('.') - major = int(split[0]) - minor = int(split[1]) - if sys.version_info.major < major and sys.version_info.minor < minor: - raise EnvironmentError("Expected at least Python 3.7") - -def to_str(expr): - """Convert a claripy expression to a nice string representation. - - Actually, the resulting representation is not very nice at all. It mostly - serves debugging purposes. - """ - import claripy - - if not issubclass(type(expr), claripy.ast.Base): - return f'{type(expr)}[{str(expr)}]' - - assert(expr.depth > 0) - if expr.depth == 1: - if expr.symbolic: - name = expr._encoded_name.decode() - return f'symbol[{name}]' - else: - assert(expr.concrete) - return f'value{expr.length}[{hex(expr.v)}]' - - args = [to_str(child) for child in expr.args] - return f'expr[{str(expr.op)}({", ".join(args)})]' |