about summary refs log tree commit diff stats
path: root/focaccia/miasm_util.py
blob: 24a0e113f521a933b651ce0664fb624b5a98024c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
from typing import Callable

from miasm.core.locationdb import LocationDB, LocKey
from miasm.expression.expression import Expr, ExprOp, ExprId, ExprLoc, \
                                        ExprInt, ExprMem, ExprCompose, \
                                        ExprSlice, ExprCond
from miasm.expression.simplifications import expr_simp_explicit

from .snapshot import ReadableProgramState, \
                      RegisterAccessError, MemoryAccessError

def simp_segm(expr_simp, expr: ExprOp):
    """Simplify a segmentation expression to an addition of the segment
    register's base value and the address argument.
    """
    import miasm.arch.x86.regs as regs

    base_regs = {
        regs.FS: ExprId('fs_base', 64),
        regs.GS: ExprId('gs_base', 64),
    }

    if expr.op == 'segm':
        segm, addr = expr.args
        assert(segm == regs.FS or segm == regs.GS)
        return expr_simp(base_regs[segm] + addr)
    return expr

def simp_fadd(expr_simp, expr: ExprOp):
    from .utils import float_bits_to_uint, uint_bits_to_float, \
                       double_bits_to_uint, uint_bits_to_double

    if expr.op != 'fadd':
        return expr

    assert(len(expr.args) == 2)
    lhs, rhs = expr.args
    if lhs.is_int() and rhs.is_int():
        assert(lhs.size == rhs.size)
        if lhs.size == 32:
            uint_to_float = uint_bits_to_float
            float_to_uint = float_bits_to_uint
        elif lhs.size == 64:
            uint_to_float = uint_bits_to_double
            float_to_uint = double_bits_to_uint
        else:
            raise NotImplementedError('fadd on values of size not in {32, 64}')

        res = float_to_uint(uint_to_float(lhs.arg) + uint_to_float(rhs.arg))
        return expr_simp(ExprInt(res, expr.size))
    return expr

# The expression simplifier used in this module
expr_simp = expr_simp_explicit
expr_simp.enable_passes({
    ExprOp: [simp_segm, simp_fadd],
})

class MiasmSymbolResolver:
    """Resolves atomic symbols to some state."""

    miasm_flag_aliases = {
        'NF':     'SF',
        'I_F':    'IF',
        'IOPL_F': 'IOPL',
        'I_D':    'ID',
    }

    def __init__(self, state: ReadableProgramState, loc_db: LocationDB):
        self._state = state
        self._loc_db = loc_db

    @staticmethod
    def _miasm_to_regname(regname: str) -> str:
        """Convert a register name as used by Miasm to one that follows
        Focaccia's naming conventions."""
        regname = regname.upper()
        return MiasmSymbolResolver.miasm_flag_aliases.get(regname, regname)

    def resolve_register(self, regname: str) -> int | None:
        try:
            return self._state.read_register(self._miasm_to_regname(regname))
        except RegisterAccessError:
            return None

    def resolve_memory(self, addr: int, size: int) -> bytes | None:
        try:
            return self._state.read_memory(addr, size)
        except MemoryAccessError:
            return None

    def resolve_location(self, loc: LocKey) -> int | None:
        return self._loc_db.get_location_offset(loc)

def eval_expr(expr: Expr, conc_state: MiasmSymbolResolver) -> Expr:
    """Evaluate a symbolic expression with regard to a concrete reference
    state.

    :param expr:       An expression to evaluate.
    :param conc_state: The concrete reference state from which symbolic
                       register and memory state is resolved.

    :return: The most simplified and concrete representation of `expr` that
             is producible with the values from `conc_state`. Is guaranteed to
             be either an `ExprInt` or an `ExprLoc` *if* `conc_state` only
             returns concrete register- and memory values.
    """
    # Most of these implementation are just copy-pasted members of
    # `SymbolicExecutionEngine`.
    expr_to_visitor: dict[type[Expr], Callable] = {
        ExprInt:     _eval_exprint,
        ExprId:      _eval_exprid,
        ExprLoc:     _eval_exprloc,
        ExprMem:     _eval_exprmem,
        ExprSlice:   _eval_exprslice,
        ExprCond:    _eval_exprcond,
        ExprOp:      _eval_exprop,
        ExprCompose: _eval_exprcompose,
    }

    visitor = expr_to_visitor.get(expr.__class__, None)
    if visitor is None:
        raise TypeError("Unknown expr type")

    ret = visitor(expr, conc_state)
    ret = expr_simp(ret)
    assert(ret is not None)

    return ret

def _eval_exprint(expr: ExprInt, _):
    """Evaluate an ExprInt using the current state"""
    return expr

def _eval_exprid(expr: ExprId, state: MiasmSymbolResolver):
    """Evaluate an ExprId using the current state"""
    val = state.resolve_register(expr.name)
    if val is None:
        return expr
    if isinstance(val, int):
        return ExprInt(val, expr.size)
    return val

def _eval_exprloc(expr: ExprLoc, state: MiasmSymbolResolver):
    """Evaluate an ExprLoc using the current state"""
    offset = state.resolve_location(expr.loc_key)
    if offset is None:
        return expr
    return ExprInt(offset, expr.size)

def _eval_exprmem(expr: ExprMem, state: MiasmSymbolResolver):
    """Evaluate an ExprMem using the current state.
    This function first evaluates the memory pointer value.
    """
    assert(expr.size % 8 == 0)

    addr = eval_expr(expr.ptr, state)
    if not addr.is_int():
        return expr

    assert(isinstance(addr, ExprInt))
    mem = state.resolve_memory(int(addr), expr.size // 8)
    if mem is None:
        return expr

    assert(len(mem) * 8 == expr.size)
    return ExprInt(int.from_bytes(mem, byteorder='big'), expr.size)

def _eval_exprcond(expr, state: MiasmSymbolResolver):
    """Evaluate an ExprCond using the current state"""
    cond = eval_expr(expr.cond, state)
    src1 = eval_expr(expr.src1, state)
    src2 = eval_expr(expr.src2, state)
    return ExprCond(cond, src1, src2)

def _eval_exprslice(expr, state: MiasmSymbolResolver):
    """Evaluate an ExprSlice using the current state"""
    arg = eval_expr(expr.arg, state)
    return ExprSlice(arg, expr.start, expr.stop)

def _eval_cpuid(rax: ExprInt, out_reg: ExprInt):
    """Evaluate the `x86_cpuid` operator by performing a real invocation of
    the CPUID instruction.

    :param rax:     The current value of RAX. Must be concrete.
    :param out_reg: An index in `[0, 4)` signaling which register's value
                    shall be returned. Must be concrete.
    """
    from cpuid import cpuid

    regs = cpuid.CPUID()(int(rax))

    if int(out_reg) >= len(regs):
        raise ValueError(f'Output register may not be {out_reg}.')
    return ExprInt(regs[int(out_reg)], out_reg.size)

def _eval_exprop(expr, state: MiasmSymbolResolver):
    """Evaluate an ExprOp using the current state"""
    args = [eval_expr(arg, state) for arg in expr.args]

    # Special case: CPUID instruction
    # Evaluate the expression to a value obtained from an an actual call to
    # the CPUID instruction. Can't do this in an expression simplifier plugin
    # because the arguments must be concrete.
    if expr.op == 'x86_cpuid':
        assert(len(args) == 2)
        assert(isinstance(args[0], ExprInt) and isinstance(args[1], ExprInt))
        return _eval_cpuid(args[0], args[1])

    return ExprOp(expr.op, *args)

def _eval_exprcompose(expr, state: MiasmSymbolResolver):
    """Evaluate an ExprCompose using the current state"""
    args = []
    for arg in expr.args:
        args.append(eval_expr(arg, state))
    return ExprCompose(*args)