diff options
Diffstat (limited to 'symbolic.py')
| -rw-r--r-- | symbolic.py | 96 |
1 files changed, 83 insertions, 13 deletions
diff --git a/symbolic.py b/symbolic.py index a5e1f70..2a328fd 100644 --- a/symbolic.py +++ b/symbolic.py @@ -1,21 +1,33 @@ """Tools and utilities for symbolic execution with Miasm.""" +from __future__ import annotations +from typing import Self + from miasm.analysis.binary import ContainerELF from miasm.analysis.machine import Machine from miasm.core.asmblock import AsmCFG from miasm.core.locationdb import LocationDB from miasm.ir.symbexec import SymbolicExecutionEngine -from miasm.expression.expression import Expr, ExprId, ExprMem +from miasm.expression.expression import Expr, ExprId, ExprMem, ExprInt from lldb_target import LLDBConcreteTarget, record_snapshot from miasm_util import MiasmConcreteState, eval_expr from snapshot import ProgramState +from arch import Arch, supported_architectures class SymbolicTransform: def __init__(self, from_addr: int, to_addr: int): self.addr = from_addr self.range = (from_addr, to_addr) + def concat(self, other: Self) -> Self: + """Concatenate another transform to this transform. + + The symbolic transform on which `concat` is called is the transform + that is applied first, meaning: `(a.concat(b))(state) == b(a(state))`. + """ + raise NotImplementedError('concat is abstract.') + def calc_register_transform(self, conc_state: ProgramState) \ -> dict[str, int]: raise NotImplementedError('calc_register_transform is abstract.') @@ -27,6 +39,7 @@ class SymbolicTransform: class MiasmSymbolicTransform(SymbolicTransform): def __init__(self, transform: dict[ExprId, Expr], + arch: Arch, loc_db: LocationDB, start_addr: int, end_addr: int): @@ -41,22 +54,67 @@ class MiasmSymbolicTransform(SymbolicTransform): self.regs_diff: dict[str, Expr] = {} self.mem_diff: dict[ExprMem, Expr] = {} - for id, expr in transform.items(): - if isinstance(id, ExprMem): - self.mem_diff[id.ptr] = expr - elif id.name != 'IRDst': - assert(isinstance(id, ExprId)) - self.regs_diff[id.name] = expr - + for dst, expr in transform.items(): + if isinstance(dst, ExprMem): + self.mem_diff[dst] = expr + else: + assert(isinstance(dst, ExprId)) + regname = arch.to_regname(dst.name) + if regname is not None: + self.regs_diff[regname] = expr + + self.arch = arch self.loc_db = loc_db + def concat(self, other: MiasmSymbolicTransform) -> Self: + class MiasmSymbolicState: + """Drop-in replacement for MiasmConcreteState in eval_expr that + returns the current transform's symbolic equations instead of + symbolic values. Calling eval_expr with this effectively nests the + transformation into the concatenated transformation. + """ + def __init__(self, transform: MiasmSymbolicTransform): + self.transform = transform + + def resolve_register(self, regname: str): + return self.transform.regs_diff.get(regname, None) + + def resolve_memory(self, addr: int, size: int): + mem = ExprMem(ExprInt(addr, 64), size) + return self.transform.mem_diff.get(mem, None) + + def resolve_location(self, _): + return None + + if self.range[1] != other.range[0]: + raise ValueError(f'The concatenated transformations must span a' + f' contiguous range of instructions.') + + ref_state = MiasmSymbolicState(self) + for reg, expr in other.regs_diff.items(): + if reg not in self.regs_diff: + self.regs_diff[reg] = expr + else: + self.regs_diff[reg] = eval_expr(expr, ref_state) + + for dst, expr in other.mem_diff.items(): + dst = eval_expr(dst, ref_state) + if dst not in self.mem_diff: + self.mem_diff[dst] = expr + else: + self.mem_diff[dst] = eval_expr(expr, ref_state) + + self.range = (self.range[0], other.range[1]) + + return self + def calc_register_transform(self, conc_state: ProgramState) \ -> dict[str, int]: ref_state = MiasmConcreteState(conc_state, self.loc_db) res = {} for regname, expr in self.regs_diff.items(): - res[regname] = eval_expr(expr, ref_state) + res[regname] = int(eval_expr(expr, ref_state)) return res def calc_memory_transform(self, conc_state: ProgramState) \ @@ -71,8 +129,14 @@ class MiasmSymbolicTransform(SymbolicTransform): return res def __repr__(self) -> str: - return f'Symbolic state transformation for instruction \ - {hex(self.addr)}.' + start, end = self.range + res = f'Symbolic state transformation {hex(start)} -> {hex(end)}:\n' + for reg, expr in self.regs_diff.items(): + res += f' {reg:6s} = {expr}\n' + for mem, expr in self.mem_diff.items(): + res += f' {mem} = {expr}\n' + + return res def _step_until(target: LLDBConcreteTarget, addr: int) -> list[int]: """Step a concrete target to a specific instruction. @@ -181,6 +245,12 @@ def collect_symbolic_trace(binary: str, cont = ContainerELF.from_stream(bin_file, loc_db) machine = Machine(cont.arch) + # Find corresponding architecture + if machine.name not in supported_architectures: + print(f'[ERROR] {machine.name} is not supported. Returning.') + return [] + arch = supported_architectures[machine.name] + # Create disassembly/lifting context mdis = machine.dis_engine(cont.bin_stream, loc_db=loc_db) mdis.follow_call = True @@ -243,8 +313,8 @@ def collect_symbolic_trace(binary: str, res = [] for (start, diff), (end, _) in zip(symb_trace[:-1], symb_trace[1:]): - res.append(MiasmSymbolicTransform(diff, loc_db, start, end)) + res.append(MiasmSymbolicTransform(diff, arch, loc_db, start, end)) start, diff = symb_trace[-1] - res.append(MiasmSymbolicTransform(diff, loc_db, start, start)) + res.append(MiasmSymbolicTransform(diff, arch, loc_db, start, start)) return res |