2 files changed, 132 insertions, 64 deletions
diff --git a/focaccia.py b/focaccia.py
index 0637690..f0c6efe 100755
--- a/focaccia.py
+++ b/focaccia.py
@@ -2,20 +2,19 @@
 
 import argparse
 import platform
-from typing import Iterable, Tuple
+from typing import Callable, Iterable
 
-from focaccia.arch import supported_architectures
+import focaccia.parser as parser
+from focaccia.arch import supported_architectures, Arch
 from focaccia.compare import compare_simple, compare_symbolic, ErrorTypes
 from focaccia.lldb_target import LLDBConcreteTarget
 from focaccia.match import fold_traces, match_traces
-from focaccia.parser import parse_arancini, parse_snapshots
 from focaccia.snapshot import ProgramState
-from focaccia.symbolic import collect_symbolic_trace
-from focaccia.utils import print_result
+from focaccia.symbolic import collect_symbolic_trace, SymbolicTransform
+from focaccia.utils import print_result, get_envp
 from focaccia.reproducer import Reproducer
 from focaccia.compare import ErrorSeverity
-
-
+from focaccia.trace import Trace, TraceEnvironment
 
 verbosity = {
     'info':    ErrorTypes.INFO,
@@ -23,17 +22,34 @@ verbosity = {
     'error':   ErrorTypes.CONFIRMED,
 }
 
-def collect_concrete_trace(oracle_program: str, breakpoints: Iterable[int]) \
-        -> Tuple[list[ProgramState], list]:
+concrete_trace_parsers = {
+    'focaccia': lambda f, _: parser.parse_snapshots(f),
+    'qemu':     parser.parse_qemu,
+    'arancini': parser.parse_arancini,
+}
+
+_MatchingAlgorithm = Callable[
+    [list[ProgramState], list[SymbolicTransform]],
+    tuple[list[ProgramState], list[SymbolicTransform]]
+]
+
+matching_algorithms: dict[str, _MatchingAlgorithm] = {
+    'none':   lambda c, s: (c, s),
+    'simple': match_traces,
+    'fold':   fold_traces,
+}
+
+def collect_concrete_trace(env: TraceEnvironment, breakpoints: Iterable[int]) \
+        -> list[ProgramState]:
     """Gather snapshots from a native execution via an external debugger.
 
-    :param oracle_program: Program to execute.
+    :param env: Program to execute and the environment in which to execute it.
     :param breakpoints: List of addresses at which to break and record the
                         program's state.
 
     :return: A list of snapshots gathered from the execution.
     """
-    target = LLDBConcreteTarget(oracle_program)
+    target = LLDBConcreteTarget(env.binary_name, env.argv, env.envp)
 
     # Set breakpoints
     for address in breakpoints:
@@ -41,29 +57,56 @@ def collect_concrete_trace(oracle_program: str, breakpoints: Iterable[int]) \
 
     # Execute the native program
     snapshots = []
-    basic_blocks = []
     while not target.is_exited():
         snapshots.append(target.record_snapshot())
-        basic_blocks.append(target.get_next_basic_block())
         target.run()
 
-    return snapshots, basic_blocks
+    return snapshots
 
 def parse_arguments():
-    parser = argparse.ArgumentParser(description='Comparator for emulator logs to reference')
-    parser.add_argument('-p', '--program',
-                        type=str,
-                        required=True,
-                        help='Path to oracle program')
-    parser.add_argument('-a', '--program-arg',
-                        type=str,
+    parser = argparse.ArgumentParser()
+    parser.description = '''An emulator tester and verifier.
+
+You can pre-record symbolic traces with `tools/capture_transforms.py`, then pass
+them to the verifier with the --oracle-trace argument.
+'''
+
+    # Specification of the symbolic truth trace
+    symb_trace = parser.add_mutually_exclusive_group(required=True)
+    symb_trace.add_argument('-p', '--oracle-program',
+                            help='A program from which a symbolic truth will be'
+                                 ' recorded.')
+    symb_trace.add_argument('-o', '--oracle-trace', '--symb-trace',
+                            help='A symbolic trace that serves as a truth state'
+                                 ' for comparison.')
+    parser.add_argument('-a', '--oracle-args',
+                        nargs='*',
                         default=[],
-                        action='append',
-                        help='Arguments to the program specified with --program.')
-    parser.add_argument('-t', '--txl',
-                        type=str,
+                        help='Arguments to the oracle program.')
+    parser.add_argument('-e', '--oracle-env',
+                        nargs='*',
+                        help='Override the oracle program\'s environment during'
+                             ' symbolic and concrete execution.')
+
+    # Specification of the concrete test trace
+    parser.add_argument('-t', '--test-trace',
                         required=True,
-                        help='Path to the translation log (gathered via Arancini)')
+                        help='The concrete test states to test against the'
+                             ' symbolic truth.')
+    parser.add_argument('--test-trace-type',
+                        default='focaccia',
+                        choices=list(concrete_trace_parsers.keys()),
+                        help='Log file format of the tested program trace.'
+                             ' [Default: focaccia]')
+
+    # Algorithm and output control
+    parser.add_argument('--match',
+                        choices=list(matching_algorithms.keys()),
+                        default='simple',
+                        help='Select an algorithm to match the test trace to'
+                             ' the truth trace. Only applicable if --symbolic'
+                             ' is enabled.'
+                             ' [Default: simple]')
     parser.add_argument('--symbolic',
                         action='store_true',
                         default=False,
@@ -73,7 +116,6 @@ def parse_arguments():
                              ' generated errors significantly, but will take'
                              ' more time to complete.')
     parser.add_argument('--error-level',
-                        type=str,
                         default='warning',
                         choices=list(verbosity.keys()),
                         help='Verbosity of reported errors. \'error\' only'
@@ -83,21 +125,19 @@ def parse_arguments():
                              ' may as well stem from incomplete input data.'
                              ' \'info\' will report absolutely everything.'
                              ' [Default: warning]')
-    parser.add_argument('-r', '--reproducer',
+    parser.add_argument('--no-verifier',
                         action='store_true',
                         default=False,
-                        help='Enable reproducer to get assembly code'
-                             ' which should replicate the first error.')
-    parser.add_argument('--trace-type',
-                        type=str,
-                        default='qemu',
-                        choices=['qemu', 'arancini'],
-                        help='Trace type of the emulator.'
-                             ' Currently only Qemu and Arancini traces are accepted.'
-                             ' Use \'qemu\' for Qemu and \'arancini\' for Arancini.'
-                             ' [Default: qemu]')
-    args = parser.parse_args()
-    return args
+                        help='Don\'t print verifier output.')
+
+    # Reproducer
+    parser.add_argument('--reproducer',
+                        action='store_true',
+                        default=False,
+                        help='Generate repoducer executables for detected'
+                             ' errors.')
+
+    return parser.parse_args()
 
 def print_reproducer(result, min_severity: ErrorSeverity, oracle, oracle_args):
     for res in result:
@@ -108,6 +148,30 @@ def print_reproducer(result, min_severity: ErrorSeverity, oracle, oracle_args):
             print(rep.asm())
             return
 
+def get_test_trace(args, arch: Arch) -> Trace[ProgramState]:
+    path = args.test_trace
+    parser = concrete_trace_parsers[args.test_trace_type]
+    with open(path, 'r') as txl_file:
+        return parser(txl_file, arch)
+
+def get_truth_env(args) -> TraceEnvironment:
+    oracle = args.oracle_program
+    oracle_args = args.oracle_args
+    if args.oracle_env:
+        oracle_env = args.oracle_env
+    else:
+        oracle_env = get_envp()
+    return TraceEnvironment(oracle, oracle_args, oracle_env)
+
+def get_symbolic_trace(args):
+    if args.oracle_program:
+        env = get_truth_env(args)
+        print('Tracing', env)
+        return collect_symbolic_trace(env)
+    elif args.oracle_trace:
+        with open(args.oracle_trace, 'r') as file:
+            return parser.parse_transformations(file)
+    raise AssertionError()
 
 def main():
     args = parse_arguments()
@@ -120,36 +184,39 @@ def main():
         exit(1)
     arch = supported_architectures[platform.machine()]
 
-    txl_path = args.txl
-    oracle = args.program
-    oracle_args = args.program_arg
-
     # Parse reference trace
-    with open(txl_path, "r") as txl_file:
-        if args.trace_type == 'qemu':
-            test_states = parse_snapshots(txl_file)
-        elif args.trace_type == 'arancini':
-            test_states = parse_arancini(txl_file, arch)
-        else:
-            test_states = parse_snapshots(txl_file)
+    test_trace = get_test_trace(args, arch)
 
     # Compare reference trace to a truth
     if args.symbolic:
-        print(f'Tracing {oracle} symbolically with arguments {oracle_args}...')
-        transforms = collect_symbolic_trace(oracle, oracle_args)
-        test_states, transforms = match_traces(test_states, transforms)
-        #fold_traces(test_states, transforms)
-        result = compare_symbolic(test_states, transforms)
+        symb_trace = get_symbolic_trace(args)
+        match = matching_algorithms[args.match]
+        conc, symb = match(test_trace.states, symb_trace.states)
+
+        result = compare_symbolic(conc, symb)
+        oracle_env = symb_trace.env
     else:
+        if not args.oracle_program:
+            print('Argument --oracle-program is required for non-symbolic'
+                  ' verification!')
+            exit(1)
+
         # Record truth states from a concrete execution of the oracle
-        breakpoints = [state.read_register('PC') for state in test_states]
-        truth = collect_concrete_trace(oracle, breakpoints)
-        result = compare_simple(test_states, truth)
+        breakpoints = [state.read_register('PC') for state in test_trace]
+        env = get_truth_env(args)
+        truth_trace = collect_concrete_trace(env, breakpoints)
+
+        result = compare_simple(test_trace.states, truth_trace)
+        oracle_env = env
 
-    print_result(result, verbosity[args.error_level])
+    if not args.no_verifier:
+        print_result(result, verbosity[args.error_level])
 
     if args.reproducer:
-        print_reproducer(result, verbosity[args.error_level], oracle, oracle_args)
+        print_reproducer(result,
+                         verbosity[args.error_level],
+                         oracle_env.binary_name,
+                         oracle_env.argv)
 
 if __name__ == '__main__':
     main()
diff --git a/focaccia/miasm_util.py b/focaccia/miasm_util.py
index 24a0e11..83a8778 100644
--- a/focaccia/miasm_util.py
+++ b/focaccia/miasm_util.py
@@ -203,9 +203,10 @@ def _eval_exprop(expr, state: MiasmSymbolResolver):
     # the CPUID instruction. Can't do this in an expression simplifier plugin
     # because the arguments must be concrete.
     if expr.op == 'x86_cpuid':
-        assert(len(args) == 2)
-        assert(isinstance(args[0], ExprInt) and isinstance(args[1], ExprInt))
-        return _eval_cpuid(args[0], args[1])
+        if args[0].is_int() and args[1].is_int():
+            assert(isinstance(args[0], ExprInt) and isinstance(args[1], ExprInt))
+            return _eval_cpuid(args[0], args[1])
+        return expr
 
     return ExprOp(expr.op, *args)