about summary refs log tree commit diff stats
path: root/miasm/ir/translators/smt2.py
blob: d3366b8bbaace1a3bb8529c2f1919762a6c9fb0b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
from builtins import map
from builtins import range
import logging

from miasm.ir.translators.translator import Translator
from miasm.expression.smt2_helper import *
from miasm.expression.expression import ExprCond, ExprInt


log = logging.getLogger("translator_smt2")
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter("[%(levelname)-8s]: %(message)s"))
log.addHandler(console_handler)
log.setLevel(logging.WARNING)

class SMT2Mem(object):
    """
    Memory abstraction for TranslatorSMT2. Memory elements are only accessed,
    never written. To give a concrete value for a given memory cell in a solver,
    add "mem32.get(address, size) == <value>" constraints to your equation.
    The endianness of memory accesses is handled accordingly to the "endianness"
    attribute.
    Note: Will have one memory space for each addressing size used.
    For example, if memory is accessed via 32 bits values and 16 bits values,
    these access will not occur in the same address space.

    Adapted from Z3Mem
    """

    def __init__(self, endianness="<", name="mem"):
        """Initializes an SMT2Mem object with a given @name and @endianness.
        @endianness: Endianness of memory representation. '<' for little endian,
            '>' for big endian.
        @name: name of memory Arrays generated. They will be named
            name+str(address size) (for example mem32, mem16...).
        """
        if endianness not in ['<', '>']:
            raise ValueError("Endianness should be '>' (big) or '<' (little)")
        self.endianness = endianness
        self.mems = {} # Address size -> SMT2 memory array
        self.name = name
        # initialise address size
        self.addr_size = 0

    def get_mem_array(self, size):
        """Returns an SMT Array used internally to represent memory for addresses
        of size @size.
        @size: integer, size in bit of addresses in the memory to get.
        Return an string with the name of the SMT array..
        """
        try:
            mem = self.mems[size]
        except KeyError:
            # Lazy instantiation
            self.mems[size] = self.name + str(size)
            mem = self.mems[size]
        return mem

    def __getitem__(self, addr):
        """One byte memory access. Different address sizes with the same value
        will result in different memory accesses.
        @addr: an SMT2 expression, the address to read.
        Return an SMT2 expression of size 8 bits representing a memory access.
        """
        size = self.addr_size
        mem = self.get_mem_array(size)
        return array_select(mem, addr)

    def get(self, addr, size, addr_size):
        """ Memory access at address @addr of size @size with
        address size @addr_size.
        @addr: an SMT2 expression, the address to read.
        @size: int, size of the read in bits.
        @addr_size: int, size of the address
        Return a SMT2 expression representing a memory access.
        """
        # set address size per read access
        self.addr_size = addr_size

        original_size = size
        if original_size % 8 != 0:
            # Size not aligned on 8bits -> read more than size and extract after
            size = ((original_size // 8) + 1) * 8
        res = self[addr]
        if self.is_little_endian():
            for i in range(1, size // 8):
                index = bvadd(addr, bit_vec_val(i, addr_size))
                res = bv_concat(self[index], res)
        else:
            for i in range(1, size // 8):
                res = bv_concat(res, self[index])
        if size == original_size:
            return res
        else:
            # Size not aligned, extract right sized result
            return bv_extract(original_size-1, 0, res)

    def is_little_endian(self):
        """True if this memory is little endian."""
        return self.endianness == "<"

    def is_big_endian(self):
        """True if this memory is big endian."""
        return not self.is_little_endian()


class TranslatorSMT2(Translator):
    """Translate a Miasm expression into an equivalent SMT2
    expression. Memory is abstracted via SMT2Mem.
    The result of from_expr will be an SMT2 expression.

    If you want to interact with the memory abstraction after the translation,
    you can instantiate your own SMT2Mem that will be equivalent to the one
    used by TranslatorSMT2.

    TranslatorSMT2 provides the creation of a valid SMT2 file. For this,
    it keeps track of the translated bit vectors.

    Adapted from TranslatorZ3
    """

    # Implemented language
    __LANG__ = "smt2"

    def __init__(self, endianness="<", loc_db=None, **kwargs):
        """Instance a SMT2 translator
        @endianness: (optional) memory endianness
        """
        super(TranslatorSMT2, self).__init__(**kwargs)
        # memory abstraction
        self._mem = SMT2Mem(endianness)
        # map of translated bit vectors
        self._bitvectors = dict()
        # symbol pool
        self.loc_db = loc_db

    def from_ExprInt(self, expr):
        return bit_vec_val(int(expr), expr.size)

    def from_ExprId(self, expr):
        if str(expr) not in self._bitvectors:
            self._bitvectors[str(expr)] = expr.size
        return str(expr)

    def from_ExprLoc(self, expr):
        loc_key = expr.loc_key
        if self.loc_db is None or self.loc_db.get_location_offset(loc_key) is None:
            if str(loc_key) not in self._bitvectors:
                self._bitvectors[str(loc_key)] = expr.size
            return str(loc_key)

        offset = self.loc_db.get_location_offset(loc_key)
        return bit_vec_val(str(offset), expr.size)

    def from_ExprMem(self, expr):
        addr = self.from_expr(expr.ptr)
        # size to read from memory
        size = expr.size
        # size of memory address
        addr_size = expr.ptr.size
        return self._mem.get(addr, size, addr_size)

    def from_ExprSlice(self, expr):
        res = self.from_expr(expr.arg)
        res = bv_extract(expr.stop-1, expr.start, res)
        return res

    def from_ExprCompose(self, expr):
        res = None
        for arg in expr.args:
            e = bv_extract(arg.size-1, 0, self.from_expr(arg))
            if res:
                res = bv_concat(e, res)
            else:
                res = e
        return res

    def from_ExprCond(self, expr):
        cond = self.from_expr(expr.cond)
        src1 = self.from_expr(expr.src1)
        src2 = self.from_expr(expr.src2)

        # (and (distinct cond (_ bv0 <size>)) true)
        zero = bit_vec_val(0, expr.cond.size)
        distinct = smt2_distinct(cond, zero)
        distinct_and = smt2_and(distinct, "true")

        # (ite ((and (distinct cond (_ bv0 <size>)) true) src1 src2))
        return smt2_ite(distinct_and, src1, src2)

    def from_ExprOp(self, expr):
        args = list(map(self.from_expr, expr.args))
        res = args[0]

        if len(args) > 1:
            for arg in args[1:]:
                if expr.op == "+":
                    res = bvadd(res, arg)
                elif expr.op == "-":
                    res = bvsub(res, arg)
                elif expr.op == "*":
                    res = bvmul(res, arg)
                elif expr.op == "/":
                    res = bvsdiv(res, arg)
                elif expr.op == "sdiv":
                    res = bvsdiv(res, arg)
                elif expr.op == "udiv":
                    res = bvudiv(res, arg)
                elif expr.op == "%":
                    res = bvsmod(res, arg)
                elif expr.op == "smod":
                    res = bvsmod(res, arg)
                elif expr.op == "umod":
                    res = bvurem(res, arg)
                elif expr.op == "&":
                    res = bvand(res, arg)
                elif expr.op == "^":
                    res = bvxor(res, arg)
                elif expr.op == "|":
                    res = bvor(res, arg)
                elif expr.op == "<<":
                    res = bvshl(res, arg)
                elif expr.op == ">>":
                    res = bvlshr(res, arg)
                elif expr.op == "a>>":
                    res = bvashr(res, arg)
                elif expr.op == "<<<":
                    res = bv_rotate_left(res, arg, expr.size)
                elif expr.op == ">>>":
                    res = bv_rotate_right(res, arg, expr.size)
                elif expr.op == "==":
                    res = self.from_expr(ExprCond(expr.args[0] - expr.args[1], ExprInt(0, 1), ExprInt(1, 1)))
                else:
                    raise NotImplementedError("Unsupported OP yet: %s" % expr.op)
        elif expr.op == 'parity':
            arg = bv_extract(7, 0, res)
            res = bit_vec_val(1, 1)
            for i in range(8):
                res = bvxor(res, bv_extract(i, i, arg))
        elif expr.op == '-':
            res = bvneg(res)
        elif expr.op == "cnttrailzeros":
            src = res
            size = expr.size
            size_smt2 = bit_vec_val(size, size)
            one_smt2 = bit_vec_val(1, size)
            zero_smt2 = bit_vec_val(0, size)
            # src & (1 << (size - 1))
            op = bvand(src, bvshl(one_smt2, bvsub(size_smt2, one_smt2)))
            # op != 0
            cond = smt2_distinct(op, zero_smt2)
            # ite(cond, size - 1, src)
            res = smt2_ite(cond, bvsub(size_smt2, one_smt2), src)
            for i in range(size - 2, -1, -1):
                # smt2 expression of i
                i_smt2 = bit_vec_val(i, size)
                # src & (1 << i)
                op = bvand(src, bvshl(one_smt2, i_smt2))
                # op != 0
                cond = smt2_distinct(op, zero_smt2)
                # ite(cond, i, res)
                res = smt2_ite(cond, i_smt2, res)
        elif expr.op == "cntleadzeros":
            src = res
            size = expr.size
            one_smt2 = bit_vec_val(1, size)
            zero_smt2 = bit_vec_val(0, size)
            # (src & 1) != 0
            cond = smt2_distinct(bvand(src, one_smt2), zero_smt2)
            # ite(cond, 0, src)
            res= smt2_ite(cond, zero_smt2, src)
            for i in range(size - 1, 0, -1):
                index = - i % size
                index_smt2 = bit_vec_val(index, size)
                # src & (1 << index)
                op = bvand(src, bvshl(one_smt2, index_smt2))
                # op != 0
                cond = smt2_distinct(op, zero_smt2)
                # ite(cond, index, res)
                value_smt2 = bit_vec_val(size - (index + 1), size)
                res = smt2_ite(cond, value_smt2, res)
        else:
            raise NotImplementedError("Unsupported OP yet: %s" % expr.op)

        return res

    def from_ExprAssign(self, expr):
        src = self.from_expr(expr.src)
        dst = self.from_expr(expr.dst)
        return smt2_assert(smt2_eq(src, dst))

    def to_smt2(self, exprs, logic="QF_ABV", model=False):
        """
        Converts a valid SMT2 file for a given list of
        SMT2 expressions.

        :param exprs: list of SMT2 expressions
        :param logic: SMT2 logic
        :param model: model generation flag
        :return: String of the SMT2 file
        """
        ret = ""
        ret += "(set-logic {})\n".format(logic)

        # define bit vectors
        for bv in self._bitvectors:
            size = self._bitvectors[bv]
            ret += "{}\n".format(declare_bv(bv, size))

        # define memory arrays
        for size in self._mem.mems:
            mem = self._mem.mems[size]
            ret += "{}\n".format(declare_array(mem, bit_vec(size), bit_vec(8)))

        # merge SMT2 expressions
        for expr in exprs:
            ret += expr + "\n"

        # define action
        ret += "(check-sat)\n"

        # enable model generation
        if model:
            ret += "(get-model)\n"

        return ret


# Register the class
Translator.register(TranslatorSMT2)