Refactor project structure

Read concrete state on demand during concolic exec During concolic tracing, don't record full program snapshots at each basic block, but instead read concrete values directly from the concrete target when they are needed.
author: Theofilos Augoustis <theofilos.augoustis@gmail.com> 2023-12-31 18:29:31 +0100
committer: Theofilos Augoustis <theofilos.augoustis@gmail.com> 2023-12-31 18:29:31 +0100
commit: eae0b3b08bd078ad2f621ce2ef201e656da3f16a (patch)
tree: eb93252f39543c46146297264ff548d9925178e0 /focaccia.py
parent: d26ae0a7d583da5034cd6271f953b6253119ceae (diff)
download: focaccia-eae0b3b08bd078ad2f621ce2ef201e656da3f16a.tar.gz
focaccia-eae0b3b08bd078ad2f621ce2ef201e656da3f16a.zip
1 files changed, 187 insertions, 0 deletions
diff --git a/focaccia.py b/focaccia.py
new file mode 100755
index 0000000..e140337
--- /dev/null
+++ b/focaccia.py
@@ -0,0 +1,187 @@
+#! /bin/python3
+
+import argparse
+import platform
+from typing import Iterable
+
+from focaccia.arch import x86
+from focaccia.compare import compare_simple, compare_symbolic, \
+                             ErrorSeverity, ErrorTypes
+from focaccia.lldb_target import LLDBConcreteTarget
+from focaccia.parser import parse_arancini
+from focaccia.snapshot import ProgramState
+from focaccia.symbolic import SymbolicTransform, collect_symbolic_trace
+from focaccia.utils import print_separator
+
+def run_native_execution(oracle_program: str, breakpoints: Iterable[int]):
+    """Gather snapshots from a native execution via an external debugger.
+
+    :param oracle_program: Program to execute.
+    :param breakpoints: List of addresses at which to break and record the
+                        program's state.
+
+    :return: A list of snapshots gathered from the execution.
+    """
+    assert(platform.machine() == "x86_64")
+
+    target = LLDBConcreteTarget(oracle_program)
+
+    # Set breakpoints
+    for address in breakpoints:
+        target.set_breakpoint(address)
+
+    # Execute the native program
+    snapshots = []
+    while not target.is_exited():
+        snapshots.append(target.record_snapshot())
+        target.run()
+
+    return snapshots
+
+def match_traces(test: list[ProgramState], truth: list[SymbolicTransform]):
+    if not test or not truth:
+        return [], []
+
+    assert(test[0].read('pc') == truth[0].addr)
+
+    def index(seq, target, access=lambda el: el):
+        for i, el in enumerate(seq):
+            if access(el) == target:
+                return i
+        return None
+
+    i = 0
+    for next_state in test[1:]:
+        next_pc = next_state.read('pc')
+        index_in_truth = index(truth[i:], next_pc, lambda el: el.range[1])
+
+        # If no next element (i.e. no foldable range) is found in the truth
+        # trace, assume that the test trace contains excess states. Remove one
+        # and try again. This might skip testing some states, but covers more
+        # of the entire trace.
+        if index_in_truth is None:
+            test.pop(i + 1)
+            continue
+
+        # Fold the range of truth states until the next test state
+        for _ in range(index_in_truth):
+            truth[i].concat(truth.pop(i + 1))
+
+        assert(truth[i].range[1] == truth[i + 1].addr)
+
+        i += 1
+        if len(truth) <= i:
+            break
+
+    return test, truth
+
+def parse_inputs(txl_path, program):
+    # Our architecture
+    arch = x86.ArchX86()
+
+    with open(txl_path, "r") as txl_file:
+        txl = parse_arancini(txl_file, arch)
+
+    with open(txl_path, "r") as txl_file:
+        breakpoints = [state.read('PC') for state in txl]
+        ref = run_native_execution(program, breakpoints)
+
+    return txl, ref
+
+def parse_arguments():
+    parser = argparse.ArgumentParser(description='Comparator for emulator logs to reference')
+    parser.add_argument('-p', '--program',
+                        type=str,
+                        required=True,
+                        help='Path to oracle program')
+    parser.add_argument('-a', '--program-arg',
+                        type=str,
+                        required=False,
+                        default=[],
+                        action='append',
+                        help='Arguments to the program specified with --program.')
+    parser.add_argument('-t', '--txl',
+                        type=str,
+                        required=True,
+                        help='Path to the translation log (gathered via Arancini)')
+    parser.add_argument('--symbolic',
+                        action='store_true',
+                        default=False,
+                        help='Use an advanced algorithm that uses symbolic'
+                             ' execution to determine accurate data'
+                             ' transformations. This improves the quality of'
+                             ' generated errors significantly, but may take'
+                             ' more time to run.')
+    parser.add_argument('--error-level',
+                        type=str,
+                        default='verbose',
+                        choices=['verbose', 'errors', 'restricted'],
+                        help='Verbosity of reported errors. \'errors\' reports'
+                             ' everything that might be an error in the'
+                             ' translation, while \'verbose\' may report'
+                             ' additional errors from incomplete input'
+                             ' data, etc. [Default: verbose]')
+    args = parser.parse_args()
+    return args
+
+def print_result(result, min_severity: ErrorSeverity):
+    shown = 0
+    suppressed = 0
+
+    for res in result:
+        pc = res['pc']
+        print_separator()
+        print(f'For PC={hex(pc)}')
+        print_separator()
+
+        # Filter errors by severity
+        errs = [e for e in res['errors'] if e.severity >= min_severity]
+        suppressed += len(res['errors']) - len(errs)
+        shown += len(errs)
+
+        # Print all non-suppressed errors
+        for n, err in enumerate(errs, start=1):
+            print(f' {n:2}. {err}')
+
+        if errs:
+            print()
+            print(f'Expected transformation: {res["ref"]}')
+            print(f'Actual transformation:   {res["txl"]}')
+        else:
+            print('No errors found.')
+
+    print()
+    print('#' * 60)
+    print(f'Found {shown} errors.')
+    print(f'Suppressed {suppressed} low-priority errors'
+          f' (showing {min_severity} and higher).')
+    print('#' * 60)
+    print()
+
+def main():
+    verbosity = {
+        'verbose': ErrorTypes.INFO,
+        'errors': ErrorTypes.POSSIBLE,
+        'restricted': ErrorTypes.CONFIRMED,
+    }
+    args = parse_arguments()
+
+    txl_path = args.txl
+    program = args.program
+    prog_args = args.program_arg
+    txl, ref = parse_inputs(txl_path, program)
+
+    if args.symbolic:
+        assert(program is not None)
+
+        print(f'Tracing {program} symbolically with arguments {prog_args}...')
+        transforms = collect_symbolic_trace(program, prog_args)
+        txl, transforms = match_traces(txl, transforms)
+        result = compare_symbolic(txl, transforms)
+    else:
+        result = compare_simple(txl, ref)
+
+    print_result(result, verbosity[args.error_level])
+
+if __name__ == '__main__':
+    main()
author	Theofilos Augoustis <theofilos.augoustis@gmail.com>	2023-12-31 18:29:31 +0100
committer	Theofilos Augoustis <theofilos.augoustis@gmail.com>	2023-12-31 18:29:31 +0100
commit	eae0b3b08bd078ad2f621ce2ef201e656da3f16a (patch)
tree	eb93252f39543c46146297264ff548d9925178e0 /focaccia.py
parent	d26ae0a7d583da5034cd6271f953b6253119ceae (diff)
download	focaccia-eae0b3b08bd078ad2f621ce2ef201e656da3f16a.tar.gz focaccia-eae0b3b08bd078ad2f621ce2ef201e656da3f16a.zip