about summary refs log tree commit diff stats
path: root/tools
diff options
context:
space:
mode:
authorTheofilos Augoustis <theofilos.augoustis@gmail.com>2024-02-19 16:26:22 +0100
committerTheofilos Augoustis <theofilos.augoustis@gmail.com>2024-02-19 16:26:22 +0100
commit67dbe17ec3dff6f4c9a508ace3f1d25cb5e9f9c8 (patch)
tree5f3281228e39c072daee10dd6aea2267e4612f57 /tools
parent86b42b57d322837c18a47daa917a597c79ad2bbb (diff)
downloadfocaccia-67dbe17ec3dff6f4c9a508ace3f1d25cb5e9f9c8.tar.gz
focaccia-67dbe17ec3dff6f4c9a508ace3f1d25cb5e9f9c8.zip
Make symbolic equations more symbolic
Reduce the impact of concrete guidance on the process of calculating an
instruction's symbolic equation. The resulting equations will contain
less assumptions about the concrete state and thus be more generic.
Diffstat (limited to 'tools')
-rw-r--r--tools/_qemu_tool.py158
-rwxr-xr-xtools/capture_transforms.py7
2 files changed, 90 insertions, 75 deletions
diff --git a/tools/_qemu_tool.py b/tools/_qemu_tool.py
index 9659f0e..38715dc 100644
--- a/tools/_qemu_tool.py
+++ b/tools/_qemu_tool.py
@@ -15,7 +15,8 @@ from focaccia.compare import compare_symbolic
 from focaccia.snapshot import ProgramState, ReadableProgramState, \
                               RegisterAccessError, MemoryAccessError
 from focaccia.symbolic import SymbolicTransform, eval_symbol, ExprMem
-from focaccia.utils import print_result
+from focaccia.trace import Trace, TraceEnvironment
+from focaccia.utils import get_envp, print_result
 
 from verify_qemu import make_argparser, verbosity
 
@@ -76,6 +77,7 @@ class GDBServerStateIterator:
             exit(1)
 
         self.arch = supported_architectures[archname]
+        self.binary = self._process.progspace.filename
 
     def __iter__(self):
         return self
@@ -98,6 +100,77 @@ class GDBServerStateIterator:
 
         return GDBProgramState(self._process, gdb.selected_frame())
 
+def record_minimal_snapshot(prev_state: ReadableProgramState,
+                            cur_state: ReadableProgramState,
+                            prev_transform: SymbolicTransform,
+                            cur_transform: SymbolicTransform) \
+        -> ProgramState:
+    """Record a minimal snapshot.
+
+    A minimal snapshot must include values (registers and memory) that are
+    accessed by two transformations:
+      1. The values produced by the previous transformation (the
+         transformation that is producing this snapshot) to check these
+         values against expected values calculated from the previous
+         program state.
+      2. The values that act as inputs to the transformation acting on this
+         snapshot, to calculate the expected values of the next snapshot.
+
+    :param prev_transform: The symbolic transformation generating, or
+                           leading to, `cur_state`. Values generated by
+                           this transformation are included in the
+                           snapshot.
+    :param transform: The symbolic transformation operating on this
+                      snapshot. Input values to this transformation are
+                      included in the snapshot.
+    """
+    assert(cur_state.read_register('pc') == cur_transform.addr)
+    assert(prev_transform.arch == cur_transform.arch)
+
+    def get_written_addresses(t: SymbolicTransform):
+        """Get all output memory accesses of a symbolic transformation."""
+        return [ExprMem(a, v.size) for a, v in t.changed_mem.items()]
+
+    def set_values(regs: Iterable[str], mems: Iterable[ExprMem],
+                   cur_state: ReadableProgramState,
+                   prev_state: ReadableProgramState,
+                   out_state: ProgramState):
+        """
+        :param prev_state: Addresses of memory included in the snapshot are
+                           resolved relative to this state.
+        """
+        for regname in regs:
+            regval = cur_state.read_register(regname)
+            try:
+                out_state.set_register(regname, regval)
+            except RegisterAccessError:
+                pass
+        for mem in mems:
+            assert(mem.size % 8 == 0)
+            addr = eval_symbol(mem.ptr, prev_state)
+            try:
+                mem = cur_state.read_memory(addr, int(mem.size / 8))
+                out_state.write_memory(addr, mem)
+            except MemoryAccessError:
+                pass
+
+    state = ProgramState(cur_transform.arch)
+    state.set_register('PC', cur_transform.addr)
+
+    set_values(prev_transform.changed_regs.keys(),
+               get_written_addresses(prev_transform),
+               cur_state,
+               prev_state,  # Evaluate memory addresses based on previous
+                            # state because they are that state's output
+                            # addresses.
+               state)
+    set_values(cur_transform.get_used_registers(),
+               cur_transform.get_used_memory_addresses(),
+               cur_state,
+               cur_state,
+               state)
+    return state
+
 def collect_conc_trace(gdb: GDBServerStateIterator, \
                        strace: list[SymbolicTransform]) \
         -> tuple[list[ProgramState], list[SymbolicTransform]]:
@@ -115,75 +188,6 @@ def collect_conc_trace(gdb: GDBServerStateIterator, \
     :return: A list of concrete states and a list of corresponding symbolic
              transformations. The lists are guaranteed to have the same length.
     """
-    def record_snapshot(prev_state: ReadableProgramState,
-                        cur_state: GDBProgramState,
-                        prev_transform: SymbolicTransform,
-                        cur_transform: SymbolicTransform) \
-            -> ProgramState:
-        """Record a minimal snapshot.
-
-        A minimal snapshot must include values (registers and memory) that are
-        accessed by two transformations:
-          1. The values produced by the previous transformation (the
-             transformation that is producing this snapshot) to check these
-             values against expected values calculated from the previous
-             program state.
-          2. The values that act as inputs to the transformation acting on this
-             snapshot, to calculate the expected values of the next snapshot.
-
-        :param prev_transform: The symbolic transformation generating, or
-                               leading to, `gdb_state`. Values generated by
-                               this transformation are included in the
-                               snapshot.
-        :param transform: The symbolic transformation operating on this
-                          snapshot. Input values to this transformation are
-                          included in the snapshot.
-        """
-        assert(cur_state.read_register('pc') == cur_transform.addr)
-
-        def get_written_addresses(t: SymbolicTransform):
-            """Get all output memory accesses of a symbolic transformation."""
-            return [ExprMem(a, v.size) for a, v in t.changed_mem.items()]
-
-        def set_values(regs: Iterable[str], mems: Iterable[ExprMem],
-                       cur_state: GDBProgramState, prev_state: ReadableProgramState,
-                       out_state: ProgramState):
-            """
-            :param prev_state: Addresses of memory included in the snapshot are
-                               resolved relative to this state.
-            """
-            for regname in regs:
-                regval = cur_state.read_register(regname)
-                try:
-                    out_state.set_register(regname, regval)
-                except RegisterAccessError:
-                    pass
-            for mem in mems:
-                assert(mem.size % 8 == 0)
-                addr = eval_symbol(mem.ptr, prev_state)
-                try:
-                    mem = cur_state.read_memory(addr, int(mem.size / 8))
-                    out_state.write_memory(addr, mem)
-                except MemoryAccessError:
-                    pass
-
-        state = ProgramState(gdb.arch)
-        state.set_register('PC', cur_transform.addr)
-
-        set_values(prev_transform.changed_regs.keys(),
-                   get_written_addresses(prev_transform),
-                   cur_state,
-                   prev_state,  # Evaluate memory addresses based on previous
-                                # state because they are that state's output
-                                # addresses.
-                   state)
-        set_values(cur_transform.get_used_registers(),
-                   cur_transform.get_used_memory_addresses(),
-                   cur_state,
-                   cur_state,
-                   state)
-        return state
-
     def find_index(seq, target, access=lambda el: el):
         for i, el in enumerate(seq):
             if access(el) == target:
@@ -222,7 +226,7 @@ def collect_conc_trace(gdb: GDBServerStateIterator, \
                 symb_i += next_i + 1
 
             assert(cur_state.read_register('pc') == strace[symb_i].addr)
-            states.append(record_snapshot(
+            states.append(record_minimal_snapshot(
                 states[-1] if states else cur_state,
                 cur_state,
                 matched_transforms[-1] if matched_transforms else strace[symb_i],
@@ -240,6 +244,12 @@ def main():
 
     gdbserver_addr = 'localhost'
     gdbserver_port = args.port
+    gdb_server = GDBServerStateIterator(gdbserver_addr, gdbserver_port)
+
+    executable = gdb_server.binary
+    argv = []  # QEMU's GDB stub does not support 'info proc cmdline'
+    envp = []  # Can't get the remote target's environment
+    env = TraceEnvironment(executable, argv, envp, '?')
 
     # Read pre-computed symbolic trace
     with open(args.symb_trace, 'r') as strace:
@@ -247,8 +257,8 @@ def main():
 
     # Use symbolic trace to collect concrete trace from QEMU
     conc_states, matched_transforms = collect_conc_trace(
-        GDBServerStateIterator(gdbserver_addr, gdbserver_port),
-        symb_transforms)
+        gdb_server,
+        symb_transforms.states)
 
     # Verify and print result
     if not args.quiet:
@@ -258,7 +268,7 @@ def main():
     if args.output:
         from focaccia.parser import serialize_snapshots
         with open(args.output, 'w') as file:
-            serialize_snapshots(conc_states, file)
+            serialize_snapshots(Trace(conc_states, env), file)
 
 if __name__ == "__main__":
     main()
diff --git a/tools/capture_transforms.py b/tools/capture_transforms.py
index 5439b05..5a104c0 100755
--- a/tools/capture_transforms.py
+++ b/tools/capture_transforms.py
@@ -2,9 +2,11 @@
 
 import argparse
 import logging
+import os
 
 from focaccia import parser
 from focaccia.symbolic import collect_symbolic_trace
+from focaccia.trace import Trace, TraceEnvironment
 
 def main():
     prog = argparse.ArgumentParser()
@@ -19,7 +21,10 @@ def main():
     args = prog.parse_args()
 
     logging.disable(logging.CRITICAL)
-    trace = collect_symbolic_trace(args.binary, args.args, None)
+    env = TraceEnvironment(args.binary,
+                           args.args,
+                           [f'{k}={v}' for k, v in os.environ.items()])
+    trace = collect_symbolic_trace(env, None)
     with open(args.output, 'w') as file:
         parser.serialize_transformations(trace, file)