Refactor comparison and user-facing logic

Co-authored-by: Theofilos Augoustis <theofilos.augoustis@gmail.com> Co-authored-by: Nicola Crivellin <nicola.crivellin98@gmail.com>
author: Theofilos Augoustis <theofilos.augoustis@gmail.com> 2024-01-24 12:25:23 +0100
committer: Theofilos Augoustis <theofilos.augoustis@gmail.com> 2024-01-24 12:25:23 +0100
commit: a331aa90709fb26b795cdb649b52b067dc8f8af6 (patch)
tree: 316a83cccf5a59c0245009cb21aa87f408053b84
parent: c8bb7528b8dc053b45d8c652663479f327273ef8 (diff)
download: focaccia-a331aa90709fb26b795cdb649b52b067dc8f8af6.tar.gz
focaccia-a331aa90709fb26b795cdb649b52b067dc8f8af6.zip
5 files changed, 55 insertions, 102 deletions
diff --git a/focaccia.py b/focaccia.py
index bbd1317..032615b 100755
--- a/focaccia.py
+++ b/focaccia.py
@@ -1,19 +1,25 @@
-#! /bin/python3
+#! /usr/bin/env python3
 
 import argparse
 import platform
 from typing import Iterable
 
-from focaccia.arch import x86
-from focaccia.compare import compare_simple, compare_symbolic, \
-                             ErrorSeverity, ErrorTypes
+from focaccia.arch import supported_architectures
+from focaccia.compare import compare_simple, compare_symbolic, ErrorTypes
 from focaccia.lldb_target import LLDBConcreteTarget
 from focaccia.parser import parse_arancini
 from focaccia.snapshot import ProgramState
 from focaccia.symbolic import SymbolicTransform, collect_symbolic_trace
 from focaccia.utils import print_result
 
-def run_native_execution(oracle_program: str, breakpoints: Iterable[int]):
+verbosity = {
+    'info':    ErrorTypes.INFO,
+    'warning': ErrorTypes.POSSIBLE,
+    'error':   ErrorTypes.CONFIRMED,
+}
+
+def collect_concrete_trace(oracle_program: str, breakpoints: Iterable[int]) \
+        -> list[ProgramState]:
     """Gather snapshots from a native execution via an external debugger.
 
     :param oracle_program: Program to execute.
@@ -22,8 +28,6 @@ def run_native_execution(oracle_program: str, breakpoints: Iterable[int]):
 
     :return: A list of snapshots gathered from the execution.
     """
-    assert(platform.machine() == "x86_64")
-
     target = LLDBConcreteTarget(oracle_program)
 
     # Set breakpoints
@@ -75,19 +79,6 @@ def match_traces(test: list[ProgramState], truth: list[SymbolicTransform]):
 
     return test, truth
 
-def parse_inputs(txl_path, program):
-    # Our architecture
-    arch = x86.ArchX86()
-
-    with open(txl_path, "r") as txl_file:
-        txl = parse_arancini(txl_file, arch)
-
-    with open(txl_path, "r") as txl_file:
-        breakpoints = [state.read_register('PC') for state in txl]
-        ref = run_native_execution(program, breakpoints)
-
-    return txl, ref
-
 def parse_arguments():
     parser = argparse.ArgumentParser(description='Comparator for emulator logs to reference')
     parser.add_argument('-p', '--program',
@@ -96,7 +87,6 @@ def parse_arguments():
                         help='Path to oracle program')
     parser.add_argument('-a', '--program-arg',
                         type=str,
-                        required=False,
                         default=[],
                         action='append',
                         help='Arguments to the program specified with --program.')
@@ -110,42 +100,52 @@ def parse_arguments():
                         help='Use an advanced algorithm that uses symbolic'
                              ' execution to determine accurate data'
                              ' transformations. This improves the quality of'
-                             ' generated errors significantly, but may take'
-                             ' more time to run.')
+                             ' generated errors significantly, but will take'
+                             ' more time to complete.')
     parser.add_argument('--error-level',
                         type=str,
-                        default='verbose',
-                        choices=['verbose', 'errors', 'restricted'],
-                        help='Verbosity of reported errors. \'errors\' reports'
-                             ' everything that might be an error in the'
-                             ' translation, while \'verbose\' may report'
-                             ' additional errors from incomplete input'
-                             ' data, etc. [Default: verbose]')
+                        default='warning',
+                        choices=list(verbosity.keys()),
+                        help='Verbosity of reported errors. \'error\' only'
+                             ' reports mismatches that have been detected as'
+                             ' errors in the translation with certainty.'
+                             ' \'warning\' will report possible errors that'
+                             ' may as well stem from incomplete input data.'
+                             ' \'info\' will report absolutely everything.'
+                             ' [Default: warning]')
     args = parser.parse_args()
     return args
 
 def main():
-    verbosity = {
-        'verbose': ErrorTypes.INFO,
-        'errors': ErrorTypes.POSSIBLE,
-        'restricted': ErrorTypes.CONFIRMED,
-    }
     args = parse_arguments()
 
+    # Determine the current machine's architecture. The log type must match the
+    # architecture on which focaccia is executed because focaccia wants to
+    # execute the reference program concretely.
+    if platform.machine() not in supported_architectures:
+        print(f'Machine {platform.machine()} is not supported! Exiting.')
+        exit(1)
+    arch = supported_architectures[platform.machine()]
+
     txl_path = args.txl
-    program = args.program
-    prog_args = args.program_arg
-    txl, ref = parse_inputs(txl_path, program)
+    oracle = args.program
+    oracle_args = args.program_arg
 
-    if args.symbolic:
-        assert(program is not None)
+    # Parse reference trace
+    with open(txl_path, "r") as txl_file:
+        test_states = parse_arancini(txl_file, arch)
 
-        print(f'Tracing {program} symbolically with arguments {prog_args}...')
-        transforms = collect_symbolic_trace(program, prog_args)
-        txl, transforms = match_traces(txl, transforms)
-        result = compare_symbolic(txl, transforms)
+    # Compare reference trace to a truth
+    if args.symbolic:
+        print(f'Tracing {oracle} symbolically with arguments {oracle_args}...')
+        transforms = collect_symbolic_trace(oracle, oracle_args)
+        test_states, transforms = match_traces(test_states, transforms)
+        result = compare_symbolic(test_states, transforms)
     else:
-        result = compare_simple(txl, ref)
+        # Record truth states from a concrete execution of the oracle
+        breakpoints = [state.read_register('PC') for state in test_states]
+        truth = collect_concrete_trace(oracle, breakpoints)
+        result = compare_simple(test_states, truth)
 
     print_result(result, verbosity[args.error_level])
 
diff --git a/focaccia/compare.py b/focaccia/compare.py
index 36cd54e..d89a41a 100644
--- a/focaccia/compare.py
+++ b/focaccia/compare.py
@@ -57,9 +57,9 @@ def _calc_transformation(previous: ProgramState, current: ProgramState):
     transformation = ProgramState(arch)
     for reg in arch.regnames:
         try:
-            prev_val, cur_val = previous.read_register(reg), current.read_register(reg)
-            if prev_val is not None and cur_val is not None:
-                transformation.set_register(reg, cur_val - prev_val)
+            prev_val = previous.read_register(reg)
+            cur_val = current.read_register(reg)
+            transformation.set_register(reg, cur_val - prev_val)
         except RegisterAccessError:
             # Register is not set in either state
             pass
@@ -138,12 +138,8 @@ def compare_simple(test_states: list[ProgramState],
         pc_txl = txl.read_register(PC_REGNAME)
         pc_truth = truth.read_register(PC_REGNAME)
 
-        # The program counter should always be set on a snapshot
-        assert(pc_truth is not None)
-        assert(pc_txl is not None)
-
         if pc_txl != pc_truth:
-            print(f'Unmatched program counter {hex(txl.read_register(PC_REGNAME))}'
+            print(f'Unmatched program counter {hex(pc_txl)}'
                   f' in translated code!')
             continue
 
@@ -176,7 +172,7 @@ def _find_register_errors(txl_from: ProgramState,
     except MemoryAccessError as err:
         s, e = transform_truth.range
         return [Error(
-            ErrorTypes.INCOMPLETE,
+            ErrorTypes.POSSIBLE,
             f'Register transformations {hex(s)} -> {hex(e)} depend on'
             f' {err.mem_size} bytes at memory address {hex(err.mem_addr)}'
             f' that are not entirely present in the tested state'
@@ -280,15 +276,9 @@ def _find_errors_symbolic(txl_from: ProgramState,
     :param transform_truth: The symbolic transformation that maps the source
                             state to the destination state.
     """
-    if (txl_from.read_register('PC') != transform_truth.range[0]) \
-            or (txl_to.read_register('PC') != transform_truth.range[1]):
-        tstart, tend = transform_truth.range
-        return [Error(ErrorTypes.POSSIBLE,
-                      f'Program counters of the tested transformation'
-                      f' do not match the truth transformation:'
-                      f' {hex(txl_from.read_register("PC"))} -> {hex(txl_to.read_register("PC"))}'
-                      f' (test) vs. {hex(tstart)} -> {hex(tend)} (truth).'
-                      f' Skipping with no errors.')]
+    from_pc = txl_from.read_register('PC')
+    to_pc = txl_to.read_register('PC')
+    assert((from_pc, to_pc) == transform_truth.range)
 
     errors = []
     errors.extend(_find_register_errors(txl_from, txl_to, transform_truth))
diff --git a/focaccia/lldb_target.py b/focaccia/lldb_target.py
index 05ab66d..903e73d 100644
--- a/focaccia/lldb_target.py
+++ b/focaccia/lldb_target.py
@@ -178,7 +178,7 @@ class LLDBConcreteTarget:
         if not err.success:
             raise ConcreteMemoryError(f'Error when reading {size} bytes at'
                                       f' address {hex(addr)}: {err}')
-        return content
+        return bytes(reversed(content))  # Convert to big endian
 
     def write_memory(self, addr, value: bytes):
         """Write bytes to memory.
diff --git a/focaccia/miasm_util.py b/focaccia/miasm_util.py
index 514390d..f43c151 100644
--- a/focaccia/miasm_util.py
+++ b/focaccia/miasm_util.py
@@ -131,7 +131,7 @@ def _eval_exprmem(expr: ExprMem, state: MiasmConcreteState):
         return expr
 
     assert(len(mem) * 8 == expr.size)
-    return ExprInt(int.from_bytes(mem, byteorder='little'), expr.size)
+    return ExprInt(int.from_bytes(mem), expr.size)
 
 def _eval_exprcond(expr, state: MiasmConcreteState):
     """Evaluate an ExprCond using the current state"""
diff --git a/miasm_test.py b/miasm_test.py
deleted file mode 100644
index 8d5bd9a..0000000
--- a/miasm_test.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import argparse
-
-from focaccia.symbolic import collect_symbolic_trace
-
-def main():
-    program = argparse.ArgumentParser()
-    program.add_argument('binary')
-    program.add_argument('argv', action='store', nargs=argparse.REMAINDER)
-    program.add_argument('--start-addr',
-                         help='Instruction at which to start')
-    args = program.parse_args()
-
-    binary = args.binary
-    argv = args.argv
-
-    pc = None
-    if args.start_addr:
-        try:
-            pc = int(args.start_addr, 16)
-        except ValueError:
-            print(f'Start address must be a hexadecimal number. Exiting.')
-            exit(1)
-
-    strace = collect_symbolic_trace(binary, argv, pc)
-
-    print(f'--- {len(strace)} instructions traced.')
-    print(f'--- No new PC found. Exiting.')
-
-if __name__ == "__main__":
-    main()
-
-# TODO: To implement support for unimplemented instructions, add their
-# ASM->IR implementations to the `mnemo_func` array in
-# `miasm/arch/x86/sem.py:5142`.
-#
-# For XGETBV, I might have to add the extended control register XCR0 first.
-# This might be a nontrivial patch to Miasm.
author	Theofilos Augoustis <theofilos.augoustis@gmail.com>	2024-01-24 12:25:23 +0100
committer	Theofilos Augoustis <theofilos.augoustis@gmail.com>	2024-01-24 12:25:23 +0100
commit	a331aa90709fb26b795cdb649b52b067dc8f8af6 (patch)
tree	316a83cccf5a59c0245009cb21aa87f408053b84
parent	c8bb7528b8dc053b45d8c652663479f327273ef8 (diff)
download	focaccia-a331aa90709fb26b795cdb649b52b067dc8f8af6.tar.gz focaccia-a331aa90709fb26b795cdb649b52b067dc8f8af6.zip