Implement symbolic comparison and match traces via Miasm

Co-authored-by: Theofilos Augoustis <theofilos.augoustis@gmail.com> Co-authored-by: Nicola Crivellin <nicola.crivellin98@gmail.com>
author: Theofilos Augoustis <theofilos.augoustis@gmail.com> 2023-12-14 17:03:59 +0100
committer: Theofilos Augoustis <theofilos.augoustis@gmail.com> 2023-12-14 17:03:59 +0100
commit: 194f3d6f2ebdc7b0631fdaaeb8451142b052ccb0 (patch)
tree: 20806871433db331bdfed66ddadfadecbea2b7c4 /main.py
parent: 4a5584d8f69d8ff511285387971d8cbf803f16b7 (diff)
download: focaccia-194f3d6f2ebdc7b0631fdaaeb8451142b052ccb0.tar.gz
focaccia-194f3d6f2ebdc7b0631fdaaeb8451142b052ccb0.zip
1 files changed, 62 insertions, 67 deletions
diff --git a/main.py b/main.py
index fabb05b..a51ecf7 100755
--- a/main.py
+++ b/main.py
@@ -4,11 +4,12 @@ import argparse
 import platform
 from typing import Iterable
 
-import arancini
 from arch import x86
 from compare import compare_simple, compare_symbolic
-from lldb_target import LLDBConcreteTarget, record_snapshot
-from symbolic import collect_symbolic_trace
+from lldb_target import LLDBConcreteTarget
+from parser import parse_arancini
+from snapshot import ProgramState
+from symbolic import SymbolicTransform, collect_symbolic_trace
 from utils import check_version, print_separator
 
 def run_native_execution(oracle_program: str, breakpoints: Iterable[int]):
@@ -31,28 +32,58 @@ def run_native_execution(oracle_program: str, breakpoints: Iterable[int]):
     # Execute the native program
     snapshots = []
     while not target.is_exited():
-        snapshots.append(record_snapshot(target))
+        snapshots.append(target.record_snapshot())
         target.run()
 
     return snapshots
 
-def parse_inputs(txl_path, ref_path, program):
+def match_traces(test: list[ProgramState], truth: list[SymbolicTransform]):
+    if not test or not truth:
+        return [], []
+
+    assert(test[0].read('pc') == truth[0].addr)
+
+    def index(seq, target, access=lambda el: el):
+        for i, el in enumerate(seq):
+            if access(el) == target:
+                return i
+        return None
+
+    i = 0
+    for next_state in test[1:]:
+        next_pc = next_state.read('pc')
+        index_in_truth = index(truth[i:], next_pc, lambda el: el.range[1])
+
+        # If no next element (i.e. no foldable range) is found in the truth
+        # trace, assume that the test trace contains excess states. Remove one
+        # and try again. This might skip testing some states, but covers more
+        # of the entire trace.
+        if index_in_truth is None:
+            test.pop(i + 1)
+            continue
+
+        # Fold the range of truth states until the next test state
+        for _ in range(index_in_truth):
+            truth[i].concat(truth.pop(i + 1))
+
+        assert(truth[i].range[1] == truth[i + 1].addr)
+
+        i += 1
+        if len(truth) <= i:
+            break
+
+    return test, truth
+
+def parse_inputs(txl_path, program):
     # Our architecture
     arch = x86.ArchX86()
 
-    txl = []
     with open(txl_path, "r") as txl_file:
-        txl = arancini.parse(txl_file.readlines(), arch)
+        txl = parse_arancini(txl_file, arch)
 
-    ref = []
-    if program is not None:
-        with open(txl_path, "r") as txl_file:
-            breakpoints = arancini.parse_break_addresses(txl_file.readlines())
+    with open(txl_path, "r") as txl_file:
+        breakpoints = [state.read('PC') for state in txl]
         ref = run_native_execution(program, breakpoints)
-    else:
-        assert(ref_path is not None)
-        with open(ref_path, "r") as native_file:
-            ref = arancini.parse(native_file.readlines(), arch)
 
     return txl, ref
 
@@ -60,23 +91,18 @@ def parse_arguments():
     parser = argparse.ArgumentParser(description='Comparator for emulator logs to reference')
     parser.add_argument('-p', '--program',
                         type=str,
+                        required=True,
                         help='Path to oracle program')
-    parser.add_argument('-r', '--ref',
+    parser.add_argument('-a', '--program-arg',
                         type=str,
-                        required=True,
-                        help='Path to the reference log (gathered with run.sh)')
+                        required=False,
+                        default=[],
+                        action='append',
+                        help='Arguments to the program specified with --program.')
     parser.add_argument('-t', '--txl',
                         type=str,
                         required=True,
                         help='Path to the translation log (gathered via Arancini)')
-    parser.add_argument('-s', '--stats',
-                        action='store_true',
-                        default=False,
-                        help='Run statistics on comparisons')
-    parser.add_argument('-v', '--verbose',
-                        action='store_true',
-                        default=True,
-                        help='Path to oracle program')
     parser.add_argument('--symbolic',
                         action='store_true',
                         default=False,
@@ -90,46 +116,17 @@ def main():
     args = parse_arguments()
 
     txl_path = args.txl
-    reference_path = args.ref
     program = args.program
-
-    stats = args.stats
-    verbose = args.verbose
-
-    if verbose:
-        print("Enabling verbose program output")
-        print(f"Verbose: {verbose}")
-        print(f"Statistics: {stats}")
-        print(f"Symbolic: {args.symbolic}")
-
-    if program is None and reference_path is None:
-        raise ValueError('Either program or path to native file must be'
-                         'provided')
-
-    txl, ref = parse_inputs(txl_path, reference_path, program)
-
-    if program != None and reference_path != None:
-        with open(reference_path, 'w') as w:
-            for snapshot in ref:
-                print(snapshot, file=w)
+    prog_args = args.program_arg
+    txl, ref = parse_inputs(txl_path, program)
 
     if args.symbolic:
         assert(program is not None)
 
-        transforms = collect_symbolic_trace(program, [program])
-
-        new = transforms[0] \
-            .concat(transforms[1]) \
-            .concat(transforms[2]) \
-            .concat(transforms[3]) \
-            .concat(transforms[4])
-        print(f'New transform: {new}')
-        exit(0)
-        # TODO: Transform the traces so that the states match
+        print(f'Tracing {program} with arguments {prog_args}...')
+        transforms = collect_symbolic_trace(program, [program, *prog_args])
+        txl, transforms = match_traces(txl, transforms)
         result = compare_symbolic(txl, transforms)
-
-        raise NotImplementedError('The symbolic comparison algorithm is not'
-                                  ' supported yet.')
     else:
         result = compare_simple(txl, ref)
 
@@ -140,15 +137,13 @@ def main():
         print(f'For PC={hex(pc)}')
         print_separator()
 
-        txl = res['txl']
         ref = res['ref']
         for err in res['errors']:
-            reg = err['reg']
-            print(f'Content of register {reg} is possibly false.'
-                  f' Expected difference: {err["expected"]}, actual difference'
-                  f' in the translation: {err["actual"]}.\n'
-                  f'    (txl) {reg}: {hex(txl.read(reg))}\n'
-                  f'    (ref) {reg}: {hex(ref.read(reg))}')
+            print(f' - {err}')
+        if res['errors']:
+            print(ref)
+        else:
+            print('No errors found.')
 
     print()
     print('#' * 60)
author	Theofilos Augoustis <theofilos.augoustis@gmail.com>	2023-12-14 17:03:59 +0100
committer	Theofilos Augoustis <theofilos.augoustis@gmail.com>	2023-12-14 17:03:59 +0100
commit	194f3d6f2ebdc7b0631fdaaeb8451142b052ccb0 (patch)
tree	20806871433db331bdfed66ddadfadecbea2b7c4 /main.py
parent	4a5584d8f69d8ff511285387971d8cbf803f16b7 (diff)
download	focaccia-194f3d6f2ebdc7b0631fdaaeb8451142b052ccb0.tar.gz focaccia-194f3d6f2ebdc7b0631fdaaeb8451142b052ccb0.zip