diff options
Diffstat (limited to '')
| -rw-r--r-- | README.md | 2 | ||||
| -rwxr-xr-x | focaccia.py | 42 | ||||
| -rw-r--r-- | focaccia/match.py | 103 |
3 files changed, 108 insertions, 39 deletions
diff --git a/README.md b/README.md index 82918b0..5c14d91 100644 --- a/README.md +++ b/README.md @@ -65,4 +65,6 @@ and concrete parts together into "concolic" execution. - `focaccia/parser.py`: Utilities for parsing logs from Arancini and QEMU, as well as serializing/deserializing to/from our own log format. + - `focaccia/match.py`: Algorithms for trace matching. + - `miasm_test.py`: A test script that traces a program concolically. diff --git a/focaccia.py b/focaccia.py index 032615b..bf5a9ff 100755 --- a/focaccia.py +++ b/focaccia.py @@ -7,9 +7,10 @@ from typing import Iterable from focaccia.arch import supported_architectures from focaccia.compare import compare_simple, compare_symbolic, ErrorTypes from focaccia.lldb_target import LLDBConcreteTarget +from focaccia.match import fold_traces from focaccia.parser import parse_arancini from focaccia.snapshot import ProgramState -from focaccia.symbolic import SymbolicTransform, collect_symbolic_trace +from focaccia.symbolic import collect_symbolic_trace from focaccia.utils import print_result verbosity = { @@ -42,43 +43,6 @@ def collect_concrete_trace(oracle_program: str, breakpoints: Iterable[int]) \ return snapshots -def match_traces(test: list[ProgramState], truth: list[SymbolicTransform]): - if not test or not truth: - return [], [] - - assert(test[0].read_register('pc') == truth[0].addr) - - def index(seq, target, access=lambda el: el): - for i, el in enumerate(seq): - if access(el) == target: - return i - return None - - i = 0 - for next_state in test[1:]: - next_pc = next_state.read_register('pc') - index_in_truth = index(truth[i:], next_pc, lambda el: el.range[1]) - - # If no next element (i.e. no foldable range) is found in the truth - # trace, assume that the test trace contains excess states. Remove one - # and try again. This might skip testing some states, but covers more - # of the entire trace. - if index_in_truth is None: - test.pop(i + 1) - continue - - # Fold the range of truth states until the next test state - for _ in range(index_in_truth): - truth[i].concat(truth.pop(i + 1)) - - assert(truth[i].range[1] == truth[i + 1].addr) - - i += 1 - if len(truth) <= i: - break - - return test, truth - def parse_arguments(): parser = argparse.ArgumentParser(description='Comparator for emulator logs to reference') parser.add_argument('-p', '--program', @@ -139,7 +103,7 @@ def main(): if args.symbolic: print(f'Tracing {oracle} symbolically with arguments {oracle_args}...') transforms = collect_symbolic_trace(oracle, oracle_args) - test_states, transforms = match_traces(test_states, transforms) + fold_traces(test_states, transforms) result = compare_symbolic(test_states, transforms) else: # Record truth states from a concrete execution of the oracle diff --git a/focaccia/match.py b/focaccia/match.py new file mode 100644 index 0000000..4c8071f --- /dev/null +++ b/focaccia/match.py @@ -0,0 +1,103 @@ +from typing import Iterable + +from .snapshot import ProgramState +from .symbolic import SymbolicTransform + +def _find_index(seq: Iterable, target, access_seq_elem=lambda el: el): + for i, el in enumerate(seq): + if access_seq_elem(el) == target: + return i + return None + +def fold_traces(ctrace: list[ProgramState], + strace: list[SymbolicTransform]): + """Try to fold a higher-granularity symbolic trace to match a lower- + granularity concrete trace. + + Modifies the inputs in-place. + + :param ctrace: A concrete trace. Is assumed to have lower granularity than + `truth`. + :param strace: A symbolic trace. Is assumed to have higher granularity than + `test`. We assume that because we control the symbolic trace + generation algorithm, and it produces traces on the level of + single instructions, which is the highest granularity + possible. + """ + if not ctrace or not strace: + return [], [] + + assert(ctrace[0].read_register('pc') == strace[0].addr) + + i = 0 + for next_state in ctrace[1:]: + next_pc = next_state.read_register('pc') + index_in_truth = _find_index(strace[i:], next_pc, lambda el: el.range[1]) + + # If no next element (i.e. no foldable range) is found in the truth + # trace, assume that the test trace contains excess states. Remove one + # and try again. This might skip testing some states, but covers more + # of the entire trace. + if index_in_truth is None: + ctrace.pop(i + 1) + continue + + # Fold the range of truth states until the next test state + for _ in range(index_in_truth): + strace[i].concat(strace.pop(i + 1)) + + i += 1 + if len(strace) <= i: + break + + # Fold remaining symbolic transforms into one + while i + 1 < len(strace): + strace[i].concat(strace.pop(i + 1)) + +def match_traces(ctrace: list[ProgramState], \ + strace: list[SymbolicTransform]): + """Try to match traces that don't follow the same program flow. + + This algorithm is useful if traces of the same binary mismatch due to + differences in environment during their recording. + + Does not modify the arguments. Creates and returns new lists. + + :param test: A concrete trace. + :param truth: A symbolic trace. + + :return: The modified traces. + """ + if not strace: + return [], [] + + states = [] + matched_transforms = [] + + state_iter = iter(ctrace) + symb_i = 0 + for cur_state in state_iter: + pc = cur_state.read_register('pc') + + if pc != strace[symb_i].addr: + next_i = _find_index(strace[symb_i+1:], pc, lambda t: t.addr) + + # Drop the concrete state if no address in the symbolic trace + # matches + if next_i is None: + continue + + # Otherwise, jump to the next matching symbolic state + symb_i += next_i + 1 + + # Append the now matching state/transform pair to the traces + assert(cur_state.read_register('pc') == strace[symb_i].addr) + states.append(cur_state) + matched_transforms.append(strace[symb_i]) + + # Step forward + symb_i += 1 + + assert(len(states) == len(matched_transforms)) + + return states, matched_transforms |