about summary refs log tree commit diff stats
path: root/miasm2/ir/symbexec_types.py
blob: 297c0c9e92165762a74ac36e6778c58374e6f327 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
from miasm2.ir.symbexec import SymbolicExecutionEngine, StateEngine
from miasm2.expression.simplifications import expr_simp
from miasm2.expression.expression import ExprId, ExprInt, ExprSlice,\
    ExprMem, ExprCond, ExprCompose, ExprOp

from miasm2.core.ctypesmngr import CTypeId


class SymbolicStateCTypes(StateEngine):
    """Store C types of symbols"""

    def __init__(self, symbols):
        self._symbols = frozenset(symbols.items())

    def __hash__(self):
        return hash((self.__class__, self._symbols))

    def __str__(self):
        out = []
        for dst, src in sorted(self._symbols):
            out.append("%s = %s" % (dst, src))
        return "\n".join(out)

    def __eq__(self, other):
        if self is other:
            return True
        if self.__class__ != other.__class__:
            return False
        return self.symbols == other.symbols

    def __iter__(self):
        for dst, src in self._symbols:
            yield dst, src

    def merge(self, other):
        """Merge two symbolic states
        Only expressions with equal C types in both states are kept.
        @other: second symbolic state
        """
        symb_a = self.symbols.items()
        symb_b = other.symbols.items()
        symbols = dict(set(symb_a).intersection(symb_b))
        return self.__class__(symbols)

    @property
    def symbols(self):
        """Return the dictionnary of known symbols'types"""
        return dict(self._symbols)


class SymbExecCType(SymbolicExecutionEngine):
    """Engine of C types propagation
    WARNING: avoid memory aliases here!
    """

    StateEngine = SymbolicStateCTypes
    OBJC_INTERNAL = "___OBJC___"

    def __init__(self, ir_arch,
                 symbols,
                 chandler,
                 func_read=None,
                 func_write=None,
                 sb_expr_simp=expr_simp):
        self.chandler = chandler
        super(SymbExecCType, self).__init__(ir_arch,
                                            {},
                                            func_read,
                                            func_write,
                                            sb_expr_simp)
        self.symbols = dict(symbols)
        offset_types = []
        for name in [('int',), ('long',),
                     ('long', 'long'),
                     ('char',), ('short',),

                     ('unsigned', 'char',), ('unsigned', 'short',),
                     ('unsigned', 'int',), ('unsigned', 'long',),
                     ('unsigned', 'long', 'long')]:
            objc = self.chandler.type_analyzer.types_mngr.get_objc(CTypeId(*name))
            offset_types.append(objc)
        self.offset_types = offset_types

    def is_type_offset(self, objc):
        """Return True if @objc is char/short/int/long"""
        return objc in self.offset_types

    def get_tpye_int_by_size(self, size):
        """Return a char/short/int/long type with the size equal to @size
        @size: size in bit"""

        for objc in self.offset_types:
            if objc.size == size / 8:
                return objc
        return None

    def is_offset_list(self, types, size):
        """Return the corresponding char/short/int/long type of @size, if every
        types in the list @types are type offset
        @types: a list of c types
        @size: size in bit"""

        for arg_type in types:
            if not self.is_type_offset(arg_type):
                return None
        objc = self.get_tpye_int_by_size(size)
        if objc:
            return objc
        # default size
        objc = self.offset_types[0]
        return objc

    def apply_expr_on_state_visit_cache(self, expr, state, cache, level=0):
        """
        Deep First evaluate nodes:
            1. evaluate node's sons
            2. simplify
        """

        expr = self.expr_simp(expr)

        if expr in cache:
            return cache[expr]
        elif expr in state:
            return state[expr]
        elif isinstance(expr, ExprInt):
            objc = self.get_tpye_int_by_size(expr.size)
            if objc is None:
                objc = self.chandler.type_analyzer.types_mngr.get_objc(CTypeId('int'))
            return objc
        elif isinstance(expr, ExprId):
            if expr in state:
                return state[expr]
            return None
        elif isinstance(expr, ExprMem):
            ptr = self.apply_expr_on_state_visit_cache(expr.arg, state, cache, level + 1)
            if ptr is None:
                return None
            self.chandler.type_analyzer.expr_types[self.OBJC_INTERNAL] = ptr
            ptr_expr = ExprId(self.OBJC_INTERNAL, expr.arg.size)
            objcs = self.chandler.expr_to_types(ExprMem(ptr_expr, expr.size))
            if objcs is None:
                return None
            objc = objcs[0]
            return objc
        elif isinstance(expr, ExprCond):
            src1 = self.apply_expr_on_state_visit_cache(expr.src1, state, cache, level + 1)
            src2 = self.apply_expr_on_state_visit_cache(expr.src2, state, cache, level + 1)
            types = [src1, src2]
            objc = self.is_offset_list(types, expr.size)
            if objc:
                return objc
            return None
        elif isinstance(expr, ExprSlice):
            objc = self.get_tpye_int_by_size(expr.size)
            if objc is None:
                # default size
                objc = self.offset_types[0]
            return objc
        elif isinstance(expr, ExprOp):
            args = []
            types = []
            for oarg in expr.args:
                arg = self.apply_expr_on_state_visit_cache(oarg, state, cache, level + 1)
                types.append(arg)
            if None in types:
                return None
            objc = self.is_offset_list(types, expr.size)
            if objc:
                return objc
            # Find Base + int
            if expr.op != '+':
                return None
            args = list(expr.args)
            if args[-1].is_int():
                offset = args.pop()
                types.pop()
            if len(args) == 1:
                arg, arg_type = args.pop(), types.pop()
                self.chandler.type_analyzer.expr_types[self.OBJC_INTERNAL] = arg_type
                ptr_expr = ExprId(self.OBJC_INTERNAL, arg.size)
                objc = self.chandler.expr_to_types(ptr_expr + offset)
                objc = objc[0]
                return objc
            return None
        elif isinstance(expr, ExprCompose):
            types = set()
            for oarg in expr.args:
                arg = self.apply_expr_on_state_visit_cache(oarg, state, cache, level + 1)
                types.add(arg)
            objc = self.is_offset_list(types, expr.size)
            if objc:
                return objc
            return None
        else:
            raise TypeError("Unknown expr type")

    def get_state(self):
        """Return the current state of the SymbolicEngine"""
        return self.StateEngine(self.symbols)

    def eval_ir_expr(self, assignblk):
        """
        Evaluate AssignBlock on the current state
        @assignblk: AssignBlock instance
        """
        pool_out = {}
        eval_cache = {}
        for dst, src in assignblk.iteritems():
            src = self.eval_expr(src, eval_cache)
            if isinstance(dst, ExprMem):
                continue
            elif isinstance(dst, ExprId):
                pool_out[dst] = src
            else:
                raise ValueError("affected zarb", str(dst))
        return pool_out.iteritems()

    def apply_change(self, dst, src):
        objc = src
        if objc is None and dst in self.symbols:
            del self.symbols[dst]
        else:
            self.symbols[dst] = objc

    def del_mem_above_stack(self, stack_ptr):
        """No stack deletion"""
        return