about summary refs log tree commit diff stats
path: root/src/focaccia/miasm_util.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/focaccia/miasm_util.py')
-rw-r--r--src/focaccia/miasm_util.py253
1 files changed, 253 insertions, 0 deletions
diff --git a/src/focaccia/miasm_util.py b/src/focaccia/miasm_util.py
new file mode 100644
index 0000000..a2cd025
--- /dev/null
+++ b/src/focaccia/miasm_util.py
@@ -0,0 +1,253 @@
+from typing import Callable
+
+from miasm.analysis.machine import Machine
+from miasm.core.locationdb import LocationDB, LocKey
+from miasm.expression.expression import Expr, ExprOp, ExprId, ExprLoc, \
+                                        ExprInt, ExprMem, ExprCompose, \
+                                        ExprSlice, ExprCond
+from miasm.expression.simplifications import expr_simp_explicit
+
+from . import arch
+from .arch import Arch
+from .snapshot import ReadableProgramState, \
+                      RegisterAccessError, MemoryAccessError
+
+def make_machine(_arch: Arch) -> Machine:
+    """Create a Miasm `Machine` object corresponding to an `Arch`."""
+    machines = {
+        arch.x86.archname: lambda _: Machine('x86_64'),
+        # Miasm only has ARM machine names with the l/b suffix:
+        arch.aarch64.archname: lambda a: Machine(f'aarch64{a.endianness[0]}'),
+    }
+    return machines[_arch.archname](_arch)
+
+def simp_segm(expr_simp, expr: ExprOp):
+    """Simplify a segmentation expression to an addition of the segment
+    register's base value and the address argument.
+    """
+    import miasm.arch.x86.regs as regs
+
+    base_regs = {
+        regs.FS: ExprId('fs_base', 64),
+        regs.GS: ExprId('gs_base', 64),
+    }
+
+    if expr.op == 'segm':
+        segm, addr = expr.args
+        assert(segm == regs.FS or segm == regs.GS)
+        return expr_simp(base_regs[segm] + addr)
+    return expr
+
+def simp_fadd(expr_simp, expr: ExprOp):
+    from .utils import float_bits_to_uint, uint_bits_to_float, \
+                       double_bits_to_uint, uint_bits_to_double
+
+    if expr.op != 'fadd':
+        return expr
+
+    assert(len(expr.args) == 2)
+    lhs, rhs = expr.args
+    if lhs.is_int() and rhs.is_int():
+        assert(lhs.size == rhs.size)
+        if lhs.size == 32:
+            uint_to_float = uint_bits_to_float
+            float_to_uint = float_bits_to_uint
+        elif lhs.size == 64:
+            uint_to_float = uint_bits_to_double
+            float_to_uint = double_bits_to_uint
+        else:
+            raise NotImplementedError('fadd on values of size not in {32, 64}')
+
+        res = float_to_uint(uint_to_float(lhs.arg) + uint_to_float(rhs.arg))
+        return expr_simp(ExprInt(res, expr.size))
+    return expr
+
+# The expression simplifier used in this module
+expr_simp = expr_simp_explicit
+expr_simp.enable_passes({
+    ExprOp: [simp_segm, simp_fadd],
+})
+
+class MiasmSymbolResolver:
+    """Resolves atomic symbols to some state."""
+
+    miasm_flag_aliases = {
+        arch.x86.archname: {
+            'NF':     'SF',
+            'I_F':    'IF',
+            'IOPL_F': 'IOPL',
+            'I_D':    'ID',
+        },
+        arch.aarch64.archname: {
+            'NF': 'N',
+            'SF': 'N',
+            'ZF': 'Z',
+            'CF': 'C',
+            'VF': 'V',
+            'OF': 'V',
+            'QF': 'Q',
+
+            'AF': 'A',
+            'EF': 'E',
+            'IF': 'I',
+            'FF': 'F',
+        }
+    }
+
+    def __init__(self,
+                 state: ReadableProgramState,
+                 loc_db: LocationDB):
+        self._state = state
+        self._loc_db = loc_db
+        self._arch = state.arch
+        self.endianness: Arch.Endianness = self._arch.endianness
+
+    def _miasm_to_regname(self, regname: str) -> str:
+        """Convert a register name as used by Miasm to one that follows
+        Focaccia's naming conventions."""
+        regname = regname.upper()
+        if self._arch.archname in self.miasm_flag_aliases:
+            aliases = self.miasm_flag_aliases[self._arch.archname]
+            return aliases.get(regname, regname)
+        return regname
+
+    def resolve_register(self, regname: str) -> int | None:
+        try:
+            return self._state.read_register(self._miasm_to_regname(regname))
+        except RegisterAccessError as err:
+            print(f'Not a register: {regname} ({err})')
+            return None
+
+    def resolve_memory(self, addr: int, size: int) -> bytes | None:
+        try:
+            return self._state.read_memory(addr, size)
+        except MemoryAccessError:
+            return None
+
+    def resolve_location(self, loc: LocKey) -> int | None:
+        return self._loc_db.get_location_offset(loc)
+
+def eval_expr(expr: Expr, conc_state: MiasmSymbolResolver) -> Expr:
+    """Evaluate a symbolic expression with regard to a concrete reference
+    state.
+
+    :param expr:       An expression to evaluate.
+    :param conc_state: The concrete reference state from which symbolic
+                       register and memory state is resolved.
+
+    :return: The most simplified and concrete representation of `expr` that
+             is producible with the values from `conc_state`. Is guaranteed to
+             be either an `ExprInt` or an `ExprLoc` *if* `conc_state` only
+             returns concrete register- and memory values.
+    """
+    # Most of these implementation are just copy-pasted members of
+    # `SymbolicExecutionEngine`.
+    expr_to_visitor: dict[type[Expr], Callable] = {
+        ExprInt:     _eval_exprint,
+        ExprId:      _eval_exprid,
+        ExprLoc:     _eval_exprloc,
+        ExprMem:     _eval_exprmem,
+        ExprSlice:   _eval_exprslice,
+        ExprCond:    _eval_exprcond,
+        ExprOp:      _eval_exprop,
+        ExprCompose: _eval_exprcompose,
+    }
+
+    visitor = expr_to_visitor.get(expr.__class__, None)
+    if visitor is None:
+        raise TypeError("Unknown expr type")
+
+    ret = visitor(expr, conc_state)
+    ret = expr_simp(ret)
+    assert(ret is not None)
+
+    return ret
+
+def _eval_exprint(expr: ExprInt, _):
+    """Evaluate an ExprInt using the current state"""
+    return expr
+
+def _eval_exprid(expr: ExprId, state: MiasmSymbolResolver):
+    """Evaluate an ExprId using the current state"""
+    val = state.resolve_register(expr.name)
+    if val is None:
+        return expr
+    if isinstance(val, int):
+        return ExprInt(val, expr.size)
+    return val
+
+def _eval_exprloc(expr: ExprLoc, state: MiasmSymbolResolver):
+    """Evaluate an ExprLoc using the current state"""
+    offset = state.resolve_location(expr.loc_key)
+    if offset is None:
+        return expr
+    return ExprInt(offset, expr.size)
+
+def _eval_exprmem(expr: ExprMem, state: MiasmSymbolResolver):
+    """Evaluate an ExprMem using the current state.
+    This function first evaluates the memory pointer value.
+    """
+    assert(expr.size % 8 == 0)
+
+    addr = eval_expr(expr.ptr, state)
+    if not addr.is_int():
+        return expr
+
+    assert(isinstance(addr, ExprInt))
+    mem = state.resolve_memory(int(addr), expr.size // 8)
+    if mem is None:
+        return expr
+
+    assert(len(mem) * 8 == expr.size)
+    return ExprInt(int.from_bytes(mem, byteorder=state.endianness), expr.size)
+
+def _eval_exprcond(expr, state: MiasmSymbolResolver):
+    """Evaluate an ExprCond using the current state"""
+    cond = eval_expr(expr.cond, state)
+    src1 = eval_expr(expr.src1, state)
+    src2 = eval_expr(expr.src2, state)
+    return ExprCond(cond, src1, src2)
+
+def _eval_exprslice(expr, state: MiasmSymbolResolver):
+    """Evaluate an ExprSlice using the current state"""
+    arg = eval_expr(expr.arg, state)
+    return ExprSlice(arg, expr.start, expr.stop)
+
+def _eval_cpuid(rax: ExprInt, out_reg: ExprInt):
+    """Evaluate the `x86_cpuid` operator by performing a real invocation of
+    the CPUID instruction.
+
+    :param rax:     The current value of RAX. Must be concrete.
+    :param out_reg: An index in `[0, 4)` signaling which register's value
+                    shall be returned. Must be concrete.
+    """
+    from cpuid import cpuid
+
+    regs = cpuid.CPUID()(int(rax))
+
+    if int(out_reg) >= len(regs):
+        raise ValueError(f'Output register may not be {out_reg}.')
+    return ExprInt(regs[int(out_reg)], out_reg.size)
+
+def _eval_exprop(expr, state: MiasmSymbolResolver):
+    """Evaluate an ExprOp using the current state"""
+    args = [eval_expr(arg, state) for arg in expr.args]
+
+    # Special case: CPUID instruction
+    # Evaluate the expression to a value obtained from an an actual call to
+    # the CPUID instruction. Can't do this in an expression simplifier plugin
+    # because the arguments must be concrete.
+    if expr.op == 'x86_cpuid':
+        if args[0].is_int() and args[1].is_int():
+            assert(isinstance(args[0], ExprInt) and isinstance(args[1], ExprInt))
+            return _eval_cpuid(args[0], args[1])
+        return expr
+
+    return ExprOp(expr.op, *args)
+
+def _eval_exprcompose(expr, state: MiasmSymbolResolver):
+    """Evaluate an ExprCompose using the current state"""
+    args = []
+    for arg in expr.args:
+        args.append(eval_expr(arg, state))
+    return ExprCompose(*args)