diff options
34 files changed, 1053 insertions, 1140 deletions
diff --git a/example/expression/basic_simplification.py b/example/expression/basic_simplification.py index 27c86096..17b1a35b 100644 --- a/example/expression/basic_simplification.py +++ b/example/expression/basic_simplification.py @@ -11,8 +11,7 @@ b = ExprId('ebx') exprs = [a + b - a, ExprInt32(0x12) + ExprInt32(0x30) - a, - ExprCompose([(a[:8], 0, 8), - (a[8:16], 8, 16)])] + ExprCompose(a[:8], a[8:16])] for e in exprs: print '*' * 40 diff --git a/example/expression/expr_grapher.py b/example/expression/expr_grapher.py index 22dff7cf..3137e6d2 100644 --- a/example/expression/expr_grapher.py +++ b/example/expression/expr_grapher.py @@ -8,7 +8,7 @@ c = ExprId("C") d = ExprId("D") m = ExprMem(a + b + c + a) -e1 = ExprCompose([(a + b - (c * a) / m | b, 0, 32), (a + m, 32, 64)]) +e1 = ExprCompose(a + b - (c * a) / m | b, a + m) e2 = ExprInt64(15) e = ExprCond(d, e1, e2)[0:32] diff --git a/example/expression/simplification_tools.py b/example/expression/simplification_tools.py index b2df58d3..9b8aeed5 100644 --- a/example/expression/simplification_tools.py +++ b/example/expression/simplification_tools.py @@ -25,11 +25,9 @@ i1 = ExprInt(uint32(0x1)) i2 = ExprInt(uint32(0x2)) cc = ExprCond(a, b, c) -o = ExprCompose([(a[:8], 8, 16), - (a[8:16], 0, 8)]) +o = ExprCompose(a[8:16], a[:8]) -o2 = ExprCompose([(a[8:16], 0, 8), - (a[:8], 8, 16)]) +o2 = ExprCompose(a[8:16], a[:8]) l = [a[:8], b[:8], c[:8], m[:8], s, i1[:8], i2[:8], o[:8]] l2 = l[::-1] @@ -56,7 +54,7 @@ print y == y.copy() print repr(y), repr(y.copy()) -z = ExprCompose([(a[5:5 + 8], 0, 8), (b[:16], 8, 24), (x[:8], 24, 32)]) +z = ExprCompose(a[5:5 + 8], b[:16], x[:8]) print z print z.copy() print z[:31].copy().visit(replace_expr) diff --git a/example/ida/symbol_exec.py b/example/ida/symbol_exec.py index 41c5c3bf..751f9a58 100644 --- a/example/ida/symbol_exec.py +++ b/example/ida/symbol_exec.py @@ -94,7 +94,7 @@ def symbolic_exec(): print "Run symbolic execution..." sb = symbexec(ira, machine.mn.regs.regs_init) - sb.emul_ir_blocs(ira, start) + sb.emul_ir_blocks(start) modified = {} for ident in sb.symbols.symbols_id: diff --git a/example/symbol_exec/single_instr.py b/example/symbol_exec/single_instr.py index e4dcdba6..365a17ec 100644 --- a/example/symbol_exec/single_instr.py +++ b/example/symbol_exec/single_instr.py @@ -26,8 +26,8 @@ symbols_init = ira.arch.regs.regs_init symb = symbexec(ira, symbols_init) # Emulate one IR basic block -## Emulation of several basic blocks can be done through .emul_ir_blocs -cur_addr = symb.emul_ir_bloc(ira, START_ADDR) +## Emulation of several basic blocks can be done through .emul_ir_blocks +cur_addr = symb.emul_ir_block(START_ADDR) # Modified elements print 'Modified registers:' diff --git a/miasm2/analysis/data_analysis.py b/miasm2/analysis/data_analysis.py index 88f9b680..9fee22af 100644 --- a/miasm2/analysis/data_analysis.py +++ b/miasm2/analysis/data_analysis.py @@ -75,12 +75,7 @@ def intra_bloc_flow_symbexec(ir_arch, flow_graph, irb): out_nodes = {} current_nodes = {} - symbols_init = {} - for r in ir_arch.arch.regs.all_regs_ids: - # symbols_init[r] = ir_arch.arch.regs.all_regs_ids_init[i] - x = ExprId(r.name, r.size) - x.is_term = True - symbols_init[r] = x + symbols_init = dict(ir_arch.arch.regs.all_regs_ids_init) sb = symbexec(ir_arch, dict(symbols_init)) sb.emulbloc(irb) diff --git a/miasm2/arch/aarch64/sem.py b/miasm2/arch/aarch64/sem.py index aff2d8ca..0bceb8dc 100644 --- a/miasm2/arch/aarch64/sem.py +++ b/miasm2/arch/aarch64/sem.py @@ -672,8 +672,7 @@ def nop(): @sbuild.parse def extr(arg1, arg2, arg3, arg4): - compose = m2_expr.ExprCompose([(arg2, 0, arg2.size), - (arg3, arg2.size, arg2.size+arg3.size)]) + compose = m2_expr.ExprCompose(arg2, arg3) arg1 = compose[int(arg4.arg):int(arg4)+arg1.size] mnemo_func = sbuild.functions diff --git a/miasm2/arch/arm/sem.py b/miasm2/arch/arm/sem.py index 260531ac..c6f3dceb 100644 --- a/miasm2/arch/arm/sem.py +++ b/miasm2/arch/arm/sem.py @@ -481,7 +481,7 @@ def umull(ir, instr, a, b, c, d): def umlal(ir, instr, a, b, c, d): e = [] - r = c.zeroExtend(64) * d.zeroExtend(64) + ExprCompose([(a, 0, 32), (b, 32, 64)]) + r = c.zeroExtend(64) * d.zeroExtend(64) + ExprCompose(a, b) e.append(ExprAff(a, r[0:32])) e.append(ExprAff(b, r[32:64])) # r15/IRDst not allowed as output @@ -497,7 +497,7 @@ def smull(ir, instr, a, b, c, d): def smlal(ir, instr, a, b, c, d): e = [] - r = c.signExtend(64) * d.signExtend(64) + ExprCompose([(a, 0, 32), (b, 32, 64)]) + r = c.signExtend(64) * d.signExtend(64) + ExprCompose(a, b) e.append(ExprAff(a, r[0:32])) e.append(ExprAff(b, r[32:64])) # r15/IRDst not allowed as output @@ -910,14 +910,14 @@ def bfc(ir, instr, a, b, c): out = [] last = 0 if start: - out.append((a[:start], 0, start)) + out.append(a[:start]) last = start if stop - start: - out.append((ExprInt32(0)[last:stop], last, stop)) + out.append(ExprInt32(0)[last:stop]) last = stop if last < 32: - out.append((a[last:], last, 32)) - r = ExprCompose(out) + out.append(a[last:]) + r = ExprCompose(*out) e.append(ExprAff(a, r)) dst = None if PC in a.get_r(): @@ -927,10 +927,7 @@ def bfc(ir, instr, a, b, c): def rev(ir, instr, a, b): e = [] - c = ExprCompose([(b[:8], 24, 32), - (b[8:16], 16, 24), - (b[16:24], 8, 16), - (b[24:32], 0, 8)]) + c = ExprCompose(b[24:32], b[16:24], b[8:16], b[:8]) e.append(ExprAff(a, c)) return e @@ -1225,8 +1222,7 @@ class ir_arml(ir): # ir = get_mnemo_expr(self, self.name.lower(), *args) if len(args) and isinstance(args[-1], ExprOp): if args[-1].op == 'rrx': - args[-1] = ExprCompose( - [(args[-1].args[0][1:], 0, 31), (cf, 31, 32)]) + args[-1] = ExprCompose(args[-1].args[0][1:], cf) elif (args[-1].op in ['<<', '>>', '<<a', 'a>>', '<<<', '>>>'] and isinstance(args[-1].args[-1], ExprId)): args[-1] = ExprOp(args[-1].op, diff --git a/miasm2/arch/mips32/sem.py b/miasm2/arch/mips32/sem.py index d03772ca..dea822b4 100644 --- a/miasm2/arch/mips32/sem.py +++ b/miasm2/arch/mips32/sem.py @@ -99,7 +99,7 @@ def bne(arg1, arg2, arg3): def lui(arg1, arg2): """The immediate value @arg2 is shifted left 16 bits and stored in the register @arg1. The lower 16 bits are zeroes.""" - arg1 = ExprCompose([(i16(0), 0, 16), (arg2[:16], 16, 32)]) + arg1 = ExprCompose(i16(0), arg2[:16]) @sbuild.parse def nop(): @@ -251,10 +251,7 @@ def bgtz(arg1, arg2): @sbuild.parse def wsbh(arg1, arg2): - arg1 = ExprCompose([(arg2[8:16], 0, 8), - (arg2[0:8] , 8, 16), - (arg2[24:32], 16, 24), - (arg2[16:24], 24, 32)]) + arg1 = ExprCompose(arg2[8:16], arg2[0:8], arg2[24:32], arg2[16:24]) @sbuild.parse def rotr(arg1, arg2, arg3): diff --git a/miasm2/arch/msp430/sem.py b/miasm2/arch/msp430/sem.py index 169a631f..a99e500c 100644 --- a/miasm2/arch/msp430/sem.py +++ b/miasm2/arch/msp430/sem.py @@ -250,8 +250,7 @@ def call(ir, instr, a): def swpb(ir, instr, a): e = [] x, y = a[:8], a[8:16] - e.append(ExprAff(a, ExprCompose([(y, 0, 8), - (x, 8, 16)]))) + e.append(ExprAff(a, ExprCompose(y, x))) return e, [] @@ -330,8 +329,7 @@ def jmp(ir, instr, a): def rrc_w(ir, instr, a): e = [] - c = ExprCompose([(a[1:16], 0, 15), - (cf, 15, 16)]) + c = ExprCompose(a[1:16], cf) e.append(ExprAff(a, c)) e.append(ExprAff(cf, a[:1])) # e += update_flag_zn_r(c) @@ -347,8 +345,7 @@ def rrc_w(ir, instr, a): def rra_w(ir, instr, a): e = [] - c = ExprCompose([(a[1:16], 0, 15), - (a[15:16], 15, 16)]) + c = ExprCompose(a[1:16], a[15:16]) e.append(ExprAff(a, c)) # TODO: error in disasm microcorruption? # e.append(ExprAff(cf, a[:1])) @@ -406,18 +403,7 @@ mnemo_func = { } -composed_sr = ExprCompose([ - (cf, 0, 1), - (zf, 1, 2), - (nf, 2, 3), - (gie, 3, 4), - (cpuoff, 4, 5), - (osc, 5, 6), - (scg0, 6, 7), - (scg1, 7, 8), - (of, 8, 9), - (res, 9, 16), -]) +composed_sr = ExprCompose(cf, zf, nf, gie, cpuoff, osc, scg0, scg1, of, res) def ComposeExprAff(dst, src): diff --git a/miasm2/arch/sh4/regs.py b/miasm2/arch/sh4/regs.py index cfbca187..89ff42f8 100644 --- a/miasm2/arch/sh4/regs.py +++ b/miasm2/arch/sh4/regs.py @@ -78,7 +78,6 @@ all_regs_ids_init = [ExprId("%s_init" % x.name, x.size) for x in all_regs_ids] regs_init = {} for i, r in enumerate(all_regs_ids): - all_regs_ids_init[i].is_term = True regs_init[r] = all_regs_ids_init[i] regs_flt_expr = [] diff --git a/miasm2/arch/x86/regs.py b/miasm2/arch/x86/regs.py index 18083a04..34585dae 100644 --- a/miasm2/arch/x86/regs.py +++ b/miasm2/arch/x86/regs.py @@ -430,7 +430,6 @@ all_regs_ids_init = [ExprId("%s_init" % x.name, x.size) for x in all_regs_ids] regs_init = {} for i, r in enumerate(all_regs_ids): - all_regs_ids_init[i].is_term = True regs_init[r] = all_regs_ids_init[i] regs_flt_expr = [float_st0, float_st1, float_st2, float_st3, diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index 69c21ac8..5dc49efc 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -331,9 +331,7 @@ def xadd(ir, instr, a, b): def adc(ir, instr, a, b): e = [] - c = a + (b + m2_expr.ExprCompose([(m2_expr.ExprInt(0, a.size - 1), - 1, a.size), - (cf, 0, 1)])) + c = a + (b + m2_expr.ExprCompose(cf, m2_expr.ExprInt(0, a.size - 1))) e += update_flag_arith(c) e += update_flag_af(a, b, c) e += update_flag_add(a, b, c) @@ -355,9 +353,7 @@ def sub(ir, instr, a, b): def sbb(ir, instr, a, b): e = [] - c = a - (b + m2_expr.ExprCompose([(m2_expr.ExprInt(0, a.size - 1), - 1, a.size), - (cf, 0, 1)])) + c = a - (b + m2_expr.ExprCompose(cf, m2_expr.ExprInt(0, a.size - 1))) e += update_flag_arith(c) e += update_flag_af(a, b, c) e += update_flag_sub(a, b, c) @@ -898,25 +894,12 @@ def setalc(ir, instr): def bswap(ir, instr, a): e = [] if a.size == 16: - c = m2_expr.ExprCompose([(a[:8], 8, 16), - (a[8:16], 0, 8), - ]) + c = m2_expr.ExprCompose(a[8:16], a[:8]) elif a.size == 32: - c = m2_expr.ExprCompose([(a[:8], 24, 32), - (a[8:16], 16, 24), - (a[16:24], 8, 16), - (a[24:32], 0, 8), - ]) + c = m2_expr.ExprCompose(a[24:32], a[16:24], a[8:16], a[:8]) elif a.size == 64: - c = m2_expr.ExprCompose([(a[:8], 56, 64), - (a[8:16], 48, 56), - (a[16:24], 40, 48), - (a[24:32], 32, 40), - (a[32:40], 24, 32), - (a[40:48], 16, 24), - (a[48:56], 8, 16), - (a[56:64], 0, 8), - ]) + c = m2_expr.ExprCompose(a[56:64], a[48:56], a[40:48], a[32:40], + a[24:32], a[16:24], a[8:16], a[:8]) else: raise ValueError('the size DOES matter') e.append(m2_expr.ExprAff(a, c)) @@ -986,24 +969,18 @@ def scas(ir, instr, size): def compose_eflag(s=32): args = [] - regs = [cf, m2_expr.ExprInt1(1), pf, m2_expr.ExprInt1( - 0), af, m2_expr.ExprInt1(0), zf, nf, tf, i_f, df, of] - for i in xrange(len(regs)): - args.append((regs[i], i, i + 1)) - - args.append((iopl, 12, 14)) + args = [cf, m2_expr.ExprInt1(1), pf, m2_expr.ExprInt1(0), af, + m2_expr.ExprInt1(0), zf, nf, tf, i_f, df, of, iopl] if s == 32: - regs = [nt, m2_expr.ExprInt1(0), rf, vm, ac, vif, vip, i_d] + args += [nt, m2_expr.ExprInt1(0), rf, vm, ac, vif, vip, i_d] elif s == 16: - regs = [nt, m2_expr.ExprInt1(0)] + args += [nt, m2_expr.ExprInt1(0)] else: raise ValueError('unk size') - for i in xrange(len(regs)): - args.append((regs[i], i + 14, i + 15)) if s == 32: - args.append((m2_expr.ExprInt(0, 10), 22, 32)) - return m2_expr.ExprCompose(args) + args.append(m2_expr.ExprInt(0, 10)) + return m2_expr.ExprCompose(*args) def pushfd(ir, instr): @@ -1426,8 +1403,7 @@ def div(ir, instr, a): b = mRAX[instr.mode][:16] elif size in [16, 32, 64]: s1, s2 = mRDX[size], mRAX[size] - b = m2_expr.ExprCompose([(s2, 0, size), - (s1, size, size * 2)]) + b = m2_expr.ExprCompose(s2, s1) else: raise ValueError('div arg not impl', a) @@ -1436,8 +1412,7 @@ def div(ir, instr, a): # if 8 bit div, only ax is affected if size == 8: - e.append(m2_expr.ExprAff(b, m2_expr.ExprCompose([(c_d[:8], 0, 8), - (c_r[:8], 8, 16)]))) + e.append(m2_expr.ExprAff(b, m2_expr.ExprCompose(c_d[:8], c_r[:8]))) else: e.append(m2_expr.ExprAff(s1, c_r[:size])) e.append(m2_expr.ExprAff(s2, c_d[:size])) @@ -1454,8 +1429,7 @@ def idiv(ir, instr, a): b = mRAX[instr.mode][:16] elif size in [16, 32, 64]: s1, s2 = mRDX[size], mRAX[size] - b = m2_expr.ExprCompose([(s2, 0, size), - (s1, size, size * 2)]) + b = m2_expr.ExprCompose(s2, s1) else: raise ValueError('div arg not impl', a) @@ -1464,8 +1438,7 @@ def idiv(ir, instr, a): # if 8 bit div, only ax is affected if size == 8: - e.append(m2_expr.ExprAff(b, m2_expr.ExprCompose([(c_d[:8], 0, 8), - (c_r[:8], 8, 16)]))) + e.append(m2_expr.ExprAff(b, m2_expr.ExprCompose(c_d[:8], c_r[:8]))) else: e.append(m2_expr.ExprAff(s1, c_r[:size])) e.append(m2_expr.ExprAff(s2, c_d[:size])) @@ -2192,14 +2165,10 @@ def fyl2x(ir, instr): def fnstenv(ir, instr, a): e = [] # XXX TODO tag word, ... - status_word = m2_expr.ExprCompose([(m2_expr.ExprInt8(0), 0, 8), - (float_c0, 8, 9), - (float_c1, 9, 10), - (float_c2, 10, 11), - (float_stack_ptr, 11, 14), - (float_c3, 14, 15), - (m2_expr.ExprInt1(0), 15, 16), - ]) + status_word = m2_expr.ExprCompose(m2_expr.ExprInt8(0), + float_c0, float_c1, float_c2, + float_stack_ptr, float_c3, + m2_expr.ExprInt1(0)) s = instr.mode # The behaviour in 64bit is identical to 32 bit @@ -2497,15 +2466,15 @@ def fabs(ir, instr): def fnstsw(ir, instr, dst): args = [ # Exceptions -> 0 - (m2_expr.ExprInt8(0), 0, 8), - (float_c0, 8, 9), - (float_c1, 9, 10), - (float_c2, 10, 11), - (float_stack_ptr, 11, 14), - (float_c3, 14, 15), + m2_expr.ExprInt8(0), + float_c0, + float_c1, + float_c2, + float_stack_ptr, + float_c3, # B: FPU is not busy -> 0 - (m2_expr.ExprInt1(0), 15, 16)] - e = [m2_expr.ExprAff(dst, m2_expr.ExprCompose(args))] + m2_expr.ExprInt1(0)] + e = [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*args))] return e, [] @@ -2656,11 +2625,9 @@ def das(ir, instr): def aam(ir, instr, a): e = [] tempAL = mRAX[instr.mode][0:8] - newEAX = m2_expr.ExprCompose([ - (tempAL % a, 0, 8), - (tempAL / a, 8, 16), - (mRAX[instr.mode][16:], 16, mRAX[instr.mode].size), - ]) + newEAX = m2_expr.ExprCompose(tempAL % a, + tempAL / a, + mRAX[instr.mode][16:]) e += [m2_expr.ExprAff(mRAX[instr.mode], newEAX)] e += update_flag_arith(newEAX) e.append(m2_expr.ExprAff(af, m2_expr.ExprInt1(0))) @@ -2671,12 +2638,9 @@ def aad(ir, instr, a): e = [] tempAL = mRAX[instr.mode][0:8] tempAH = mRAX[instr.mode][8:16] - newEAX = m2_expr.ExprCompose([ - ((tempAL + (tempAH * a)) & m2_expr.ExprInt8(0xFF), 0, 8), - (m2_expr.ExprInt8(0), 8, 16), - (mRAX[instr.mode][16:], - 16, mRAX[instr.mode].size), - ]) + newEAX = m2_expr.ExprCompose((tempAL + (tempAH * a)) & m2_expr.ExprInt8(0xFF), + m2_expr.ExprInt8(0), + mRAX[instr.mode][16:]) e += [m2_expr.ExprAff(mRAX[instr.mode], newEAX)] e += update_flag_arith(newEAX) e.append(m2_expr.ExprAff(af, m2_expr.ExprInt1(0))) @@ -2908,9 +2872,8 @@ def l_outs(ir, instr, size): def xlat(ir, instr): e = [] - a = m2_expr.ExprCompose([(m2_expr.ExprInt(0, 24), 8, 32), - (mRAX[instr.mode][0:8], 0, 8)]) - b = m2_expr.ExprMem(m2_expr.ExprOp('+', mRBX[instr.mode], a), 8) + a = mRAX[instr.mode][0:8].zeroExtend(mRBX[instr.mode].size) + b = m2_expr.ExprMem(mRBX[instr.mode] + a, 8) e.append(m2_expr.ExprAff(mRAX[instr.mode][0:8], b)) return e, [] @@ -3073,13 +3036,10 @@ def lgs(ir, instr, a, b): def lahf(ir, instr): e = [] - args = [] - regs = [cf, m2_expr.ExprInt1(1), pf, m2_expr.ExprInt1(0), af, + args = [cf, m2_expr.ExprInt1(1), pf, m2_expr.ExprInt1(0), af, m2_expr.ExprInt1(0), zf, nf] - for i in xrange(len(regs)): - args.append((regs[i], i, i + 1)) e.append( - m2_expr.ExprAff(mRAX[instr.mode][8:16], m2_expr.ExprCompose(args))) + m2_expr.ExprAff(mRAX[instr.mode][8:16], m2_expr.ExprCompose(*args))) return e, [] @@ -3128,11 +3088,9 @@ def l_str(ir, instr, a): def movd(ir, instr, a, b): e = [] if a in regs_mm_expr: - e.append(m2_expr.ExprAff(a, m2_expr.ExprCompose([(b, 0, 32), - (m2_expr.ExprInt32(0), 32, 64)]))) + e.append(m2_expr.ExprAff(a, m2_expr.ExprCompose(b, m2_expr.ExprInt32(0)))) elif a in regs_xmm_expr: - e.append(m2_expr.ExprAff(a, m2_expr.ExprCompose([(b, 0, 32), - (m2_expr.ExprInt(0, 96), 32, 128)]))) + e.append(m2_expr.ExprAff(a, m2_expr.ExprCompose(b, m2_expr.ExprInt(0, 96)))) else: e.append(m2_expr.ExprAff(a, b[:32])) return e, [] @@ -3187,8 +3145,7 @@ def wrmsr(ir, instr): msr_addr = m2_expr.ExprId('MSR') + m2_expr.ExprInt32( 8) * mRCX[instr.mode][:32] e = [] - src = m2_expr.ExprCompose([(mRAX[instr.mode][:32], 0, 32), - (mRDX[instr.mode][:32], 32, 64)]) + src = m2_expr.ExprCompose(mRAX[instr.mode][:32], mRDX[instr.mode][:32]) e.append(m2_expr.ExprAff(m2_expr.ExprMem(msr_addr, 64), src)) return e, [] @@ -3216,17 +3173,14 @@ def vec_vertical_sem(op, elt_size, reg_size, a, b): assert(reg_size % elt_size == 0) n = reg_size / elt_size if op == '-': - ops = [( + ops = [ (a[i * elt_size:(i + 1) * elt_size] - - b[i * elt_size:(i + 1) * elt_size]), - i * elt_size, (i + 1) * elt_size) for i in xrange(0, n)] + - b[i * elt_size:(i + 1) * elt_size]) for i in xrange(0, n)] else: - ops = [(m2_expr.ExprOp(op, a[i * elt_size:(i + 1) * elt_size], - b[i * elt_size:(i + 1) * elt_size]), - i * elt_size, - (i + 1) * elt_size) for i in xrange(0, n)] + ops = [m2_expr.ExprOp(op, a[i * elt_size:(i + 1) * elt_size], + b[i * elt_size:(i + 1) * elt_size]) for i in xrange(0, n)] - return m2_expr.ExprCompose(ops) + return m2_expr.ExprCompose(*ops) def float_vec_vertical_sem(op, elt_size, reg_size, a, b): @@ -3236,24 +3190,22 @@ def float_vec_vertical_sem(op, elt_size, reg_size, a, b): x_to_int, int_to_x = {32: ('float_to_int_%d', 'int_%d_to_float'), 64: ('double_to_int_%d', 'int_%d_to_double')}[elt_size] if op == '-': - ops = [(m2_expr.ExprOp(x_to_int % elt_size, - m2_expr.ExprOp(int_to_x % elt_size, a[i * elt_size:(i + 1) * elt_size]) - - m2_expr.ExprOp( - int_to_x % elt_size, b[i * elt_size:( - i + 1) * elt_size])), - i * elt_size, (i + 1) * elt_size) for i in xrange(0, n)] + ops = [m2_expr.ExprOp(x_to_int % elt_size, + m2_expr.ExprOp(int_to_x % elt_size, a[i * elt_size:(i + 1) * elt_size]) - + m2_expr.ExprOp( + int_to_x % elt_size, b[i * elt_size:( + i + 1) * elt_size])) for i in xrange(0, n)] else: - ops = [(m2_expr.ExprOp(x_to_int % elt_size, - m2_expr.ExprOp(op, - m2_expr.ExprOp( - int_to_x % elt_size, a[i * elt_size:( - i + 1) * elt_size]), - m2_expr.ExprOp( - int_to_x % elt_size, b[i * elt_size:( - i + 1) * elt_size]))), - i * elt_size, (i + 1) * elt_size) for i in xrange(0, n)] + ops = [m2_expr.ExprOp(x_to_int % elt_size, + m2_expr.ExprOp(op, + m2_expr.ExprOp( + int_to_x % elt_size, a[i * elt_size:( + i + 1) * elt_size]), + m2_expr.ExprOp( + int_to_x % elt_size, b[i * elt_size:( + i + 1) * elt_size]))) for i in xrange(0, n)] - return m2_expr.ExprCompose(ops) + return m2_expr.ExprCompose(*ops) def __vec_vertical_instr_gen(op, elt_size, sem): @@ -3558,8 +3510,7 @@ def movss(ir, instr, a, b): e.append(m2_expr.ExprAff(a, b[:32])) else: # Source Mem Destination XMM - e.append(m2_expr.ExprAff(a, m2_expr.ExprCompose([(b, 0, 32), - (m2_expr.ExprInt(0, 96), 32, 128)]))) + e.append(m2_expr.ExprAff(a, m2_expr.ExprCompose(b, m2_expr.ExprInt(0, 96)))) return e, [] @@ -3624,19 +3575,18 @@ def ps_rl_ll(ir, instr, a, b, op, size): slices = [] for i in xrange(0, a.size, size): - slices.append((m2_expr.ExprOp(op, a[i:i + size], count[:size]), - i, i + size)) + slices.append(m2_expr.ExprOp(op, a[i:i + size], count[:size])) if isinstance(test, m2_expr.ExprInt): if int(test) == 0: - return [m2_expr.ExprAff(a[0:a.size], m2_expr.ExprCompose(slices))], [] + return [m2_expr.ExprAff(a[0:a.size], m2_expr.ExprCompose(*slices))], [] else: return [m2_expr.ExprAff(a, m2_expr.ExprInt(0, a.size))], [] e_zero = [m2_expr.ExprAff(a, m2_expr.ExprInt(0, a.size)), m2_expr.ExprAff(ir.IRDst, lbl_next)] e_do = [] - e.append(m2_expr.ExprAff(a[0:a.size], m2_expr.ExprCompose(slices))) + e.append(m2_expr.ExprAff(a[0:a.size], m2_expr.ExprCompose(*slices))) e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) return e, [irbloc(lbl_do.name, [e_do]), irbloc(lbl_zero.name, [e_zero])] @@ -3759,11 +3709,9 @@ def punpck(ir, instr, a, b, size, off): e = [] slices = [] for i in xrange(a.size / (2 * size)): - src1 = a[size * i + off: size * i + off + size] - src2 = b[size * i + off: size * i + off + size] - slices.append((src1, size * 2 * i, size * 2 * i + size)) - slices.append((src2, size * (2 * i + 1), size * (2 * i + 1) + size)) - e.append(m2_expr.ExprAff(a, m2_expr.ExprCompose(slices))) + slices.append(a[size * i + off: size * i + off + size]) + slices.append(b[size * i + off: size * i + off + size]) + e.append(m2_expr.ExprAff(a, m2_expr.ExprCompose(*slices))) return e, [] @@ -3861,36 +3809,28 @@ def pextrq(ir, instr, a, b, c): def unpckhps(ir, instr, a, b): e = [] - src = m2_expr.ExprCompose([(a[64:96], 0, 32), - (b[64:96], 32, 64), - (a[96:128], 64, 96), - (b[96:128], 96, 128)]) + src = m2_expr.ExprCompose(a[64:96], b[64:96], a[96:128], b[96:128]) e.append(m2_expr.ExprAff(a, src)) return e, [] def unpckhpd(ir, instr, a, b): e = [] - src = m2_expr.ExprCompose([(a[64:128], 0, 64), - (b[64:128], 64, 128)]) + src = m2_expr.ExprCompose(a[64:128], b[64:128]) e.append(m2_expr.ExprAff(a, src)) return e, [] def unpcklps(ir, instr, a, b): e = [] - src = m2_expr.ExprCompose([(a[0:32], 0, 32), - (b[0:32], 32, 64), - (a[32:64], 64, 96), - (b[32:64], 96, 128)]) + src = m2_expr.ExprCompose(a[0:32], b[0:32], a[32:64], b[32:64]) e.append(m2_expr.ExprAff(a, src)) return e, [] def unpcklpd(ir, instr, a, b): e = [] - src = m2_expr.ExprCompose([(a[0:64], 0, 64), - (b[0:64], 64, 128)]) + src = m2_expr.ExprCompose(a[0:64], b[0:64]) e.append(m2_expr.ExprAff(a, src)) return e, [] @@ -3940,10 +3880,9 @@ def sqrt_gen(ir, instr, a, b, size): e = [] out = [] for i in b.size / size: - out.append((m2_expr.ExprOp('fsqrt' % size, - b[i * size: (i + 1) * size]), - i * size, (i + 1) * size)) - src = m2_expr.ExprCompose(out) + out.append(m2_expr.ExprOp('fsqrt' % size, + b[i * size: (i + 1) * size])) + src = m2_expr.ExprCompose(*out) e.append(m2_expr.ExprAff(a, src)) return e, [] @@ -3976,8 +3915,8 @@ def pmovmskb(ir, instr, a, b): e = [] out = [] for i in xrange(b.size / 8): - out.append((b[8 * i + 7:8 * (i + 1)], i, i + 1)) - src = m2_expr.ExprCompose(out) + out.append(b[8 * i + 7:8 * (i + 1)]) + src = m2_expr.ExprCompose(*out) e.append(m2_expr.ExprAff(a, src.zeroExtend(a.size))) return e, [] diff --git a/miasm2/core/sembuilder.py b/miasm2/core/sembuilder.py index ce327ce1..6ff390bb 100644 --- a/miasm2/core/sembuilder.py +++ b/miasm2/core/sembuilder.py @@ -16,7 +16,7 @@ class MiasmTransformer(ast.NodeTransformer): X if Y else Z -> ExprCond(Y, X, Z) 'X'(Y) -> ExprOp('X', Y) ('X' % Y)(Z) -> ExprOp('X' % Y, Z) - {a, b} -> ExprCompose([a, 0, a.size], [b, a.size, a.size + b.size]) + {a, b} -> ExprCompose(((a, 0, a.size), (b, a.size, a.size + b.size))) """ # Parsers @@ -95,27 +95,16 @@ class MiasmTransformer(ast.NodeTransformer): return call def visit_Set(self, node): - "{a, b} -> ExprCompose([a, 0, a.size], [b, a.size, a.size + b.size])" + "{a, b} -> ExprCompose(a, b)" if len(node.elts) == 0: return node # Recursive visit node = self.generic_visit(node) - new_elts = [] - index = ast.Num(n=0) - for elt in node.elts: - new_index = ast.BinOp(op=ast.Add(), left=index, - right=ast.Attribute(value=elt, - attr='size', - ctx=ast.Load())) - new_elts.append(ast.List(elts=[elt, index, new_index], - ctx=ast.Load())) - index = new_index return ast.Call(func=ast.Name(id='ExprCompose', ctx=ast.Load()), - args=[ast.List(elts=new_elts, - ctx=ast.Load())], + args=node.elts, keywords=[], starargs=None, kwargs=None) diff --git a/miasm2/expression/expression.py b/miasm2/expression/expression.py index d04530c3..324d5fea 100644 --- a/miasm2/expression/expression.py +++ b/miasm2/expression/expression.py @@ -114,28 +114,52 @@ class Expr(object): "Parent class for Miasm Expressions" - __slots__ = ["is_term", "is_simp", "is_canon", - "is_eval", "_hash", "_repr", "_size", - "is_var_ident"] + __slots__ = ["__hash", "__repr", "__size"] + all_exprs = set() + args2expr = {} + canon_exprs = set() + use_singleton = True def set_size(self, value): raise ValueError('size is not mutable') def __init__(self): - self.is_term = False # Terminal expression - self.is_simp = False # Expression already simplified - self.is_canon = False # Expression already canonised - self.is_eval = False # Expression already evalued - self.is_var_ident = False # Expression not identifier + self.__hash = None + self.__repr = None + self.__size = None - self._hash = None - self._repr = None + size = property(lambda self: self.__size) - size = property(lambda self: self._size) + @staticmethod + def get_object(cls, args): + if not cls.use_singleton: + return object.__new__(cls, args) + + expr = Expr.args2expr.get((cls, args)) + if expr is None: + expr = object.__new__(cls, args) + Expr.args2expr[(cls, args)] = expr + return expr + + def __new__(cls, *args, **kwargs): + expr = object.__new__(cls, *args, **kwargs) + return expr + + def get_is_canon(self): + return self in Expr.canon_exprs + + def set_is_canon(self, value): + assert(value is True) + Expr.canon_exprs.add(self) + + is_canon = property(get_is_canon, set_is_canon) # Common operations + def __str__(self): + raise NotImplementedError("Abstract Method") + def __getitem__(self, i): if not isinstance(i, slice): raise TypeError("Expression: Bad slice: %s" % i) @@ -153,14 +177,14 @@ class Expr(object): return False def __repr__(self): - if self._repr is None: - self._repr = self._exprrepr() - return self._repr + if self.__repr is None: + self.__repr = self._exprrepr() + return self.__repr def __hash__(self): - if self._hash is None: - self._hash = self._exprhash() - return self._hash + if self.__hash is None: + self.__hash = self._exprhash() + return self.__hash def pre_eq(self, other): """Return True if ids are equal; @@ -264,8 +288,6 @@ class Expr(object): new_e = ExprOp(e.op, *args) else: new_e = e - elif isinstance(e, ExprCompose): - new_e = ExprCompose(canonize_expr_list_compose(e.args)) else: new_e = e new_e.is_canon = True @@ -287,8 +309,7 @@ class Expr(object): return self ad_size = size - self.size n = ExprInt(0, ad_size) - return ExprCompose([(self, 0, self.size), - (n, self.size, size)]) + return ExprCompose(self, n) def signExtend(self, size): """Sign extend to size @@ -298,12 +319,10 @@ class Expr(object): if self.size == size: return self ad_size = size - self.size - c = ExprCompose([(self, 0, self.size), - (ExprCond(self.msb(), - ExprInt(size2mask(ad_size), ad_size), - ExprInt(0, ad_size)), - self.size, size) - ]) + c = ExprCompose(self, + ExprCond(self.msb(), + ExprInt(size2mask(ad_size), ad_size), + ExprInt(0, ad_size))) return c def graph_recursive(self, graph): @@ -341,7 +360,8 @@ class ExprInt(Expr): - Constant 0x12345678 on 32bits """ - __slots__ = ["_arg"] + __slots__ = Expr.__slots__ + ["__arg"] + def __init__(self, num, size=None): """Create an ExprInt from a modint or num/size @@ -351,31 +371,36 @@ class ExprInt(Expr): super(ExprInt, self).__init__() if is_modint(num): - self._arg = num - self._size = self.arg.size + self.__arg = num + self.__size = self.arg.size if size is not None and num.size != size: raise RuntimeError("size must match modint size") elif size is not None: - self._arg = mod_size2uint[size](num) - self._size = self.arg.size + self.__arg = mod_size2uint[size](num) + self.__size = self.arg.size else: raise ValueError('arg must by modint or (int,size)! %s' % num) - arg = property(lambda self: self._arg) + size = property(lambda self: self.__size) + arg = property(lambda self: self.__arg) - def __eq__(self, other): - res = self.pre_eq(other) - if res is not None: - return res - return (self._arg == other._arg and - self._size == other._size) + def __getstate__(self): + return int(self.__arg), self.__size + + def __setstate__(self, state): + self.__init__(*state) + + def __new__(cls, arg, size=None): + if size is None: + size = arg.size + return Expr.get_object(cls, (arg, size)) def __get_int(self): "Return self integer representation" - return int(self._arg & size2mask(self._size)) + return int(self.__arg & size2mask(self.__size)) def __str__(self): - if self._arg < 0: + if self.__arg < 0: return str("-0x%X" % (- self.__get_int())) else: return str("0x%X" % self.__get_int()) @@ -390,10 +415,10 @@ class ExprInt(Expr): return set() def _exprhash(self): - return hash((EXPRINT, self._arg, self._size)) + return hash((EXPRINT, self.__arg, self.__size)) def _exprrepr(self): - return "%s(%r)" % (self.__class__.__name__, self._arg) + return "%s(0x%X)" % (self.__class__.__name__, self.__get_int()) def __contains__(self, e): return self == e @@ -403,7 +428,7 @@ class ExprInt(Expr): return self def copy(self): - return ExprInt(self._arg) + return ExprInt(self.__arg) def depth(self): return 1 @@ -428,7 +453,7 @@ class ExprId(Expr): - variable v1 """ - __slots__ = ["_name"] + __slots__ = Expr.__slots__ + ["__name"] def __init__(self, name, size=32): """Create an identifier @@ -437,19 +462,22 @@ class ExprId(Expr): """ super(ExprId, self).__init__() - self._name, self._size = name, size + self.__name, self.__size = name, size - name = property(lambda self: self._name) + size = property(lambda self: self.__size) + name = property(lambda self: self.__name) - def __eq__(self, other): - res = self.pre_eq(other) - if res is not None: - return res - return (self._name == other._name and - self._size == other._size) + def __getstate__(self): + return self.__name, self.__size + + def __setstate__(self, state): + self.__init__(*state) + + def __new__(cls, name, size=32): + return Expr.get_object(cls, (name, size)) def __str__(self): - return str(self._name) + return str(self.__name) def get_r(self, mem_read=False, cst_read=False): return set([self]) @@ -459,10 +487,10 @@ class ExprId(Expr): def _exprhash(self): # TODO XXX: hash size ?? - return hash((EXPRID, self._name, self._size)) + return hash((EXPRID, self.__name, self.__size)) def _exprrepr(self): - return "%s(%r, %d)" % (self.__class__.__name__, self._name, self._size) + return "%s(%r, %d)" % (self.__class__.__name__, self.__name, self.__size) def __contains__(self, e): return self == e @@ -472,7 +500,7 @@ class ExprId(Expr): return self def copy(self): - return ExprId(self._name, self._size) + return ExprId(self.__name, self.__size) def depth(self): return 1 @@ -489,7 +517,7 @@ class ExprAff(Expr): - var1 <- 2 """ - __slots__ = ["_src", "_dst"] + __slots__ = Expr.__slots__ + ["__dst", "__src"] def __init__(self, dst, src): """Create an ExprAff for dst <- src @@ -506,80 +534,75 @@ class ExprAff(Expr): if isinstance(dst, ExprSlice): # Complete the source with missing slice parts - self._dst = dst.arg + self.__dst = dst.arg rest = [(ExprSlice(dst.arg, r[0], r[1]), r[0], r[1]) for r in dst.slice_rest()] all_a = [(src, dst.start, dst.stop)] + rest all_a.sort(key=lambda x: x[1]) - self._src = ExprCompose(all_a) + args = [expr for (expr, _, _) in all_a] + self.__src = ExprCompose(*args) else: - self._dst, self._src = dst, src + self.__dst, self.__src = dst, src + + self.__size = self.dst.size - self._size = self.dst.size + size = property(lambda self: self.__size) + dst = property(lambda self: self.__dst) + src = property(lambda self: self.__src) - dst = property(lambda self: self._dst) - src = property(lambda self: self._src) + def __getstate__(self): + return self.__dst, self.__src + + def __setstate__(self, state): + self.__init__(*state) + + def __new__(cls, dst, src): + return Expr.get_object(cls, (dst, src)) def __str__(self): - return "%s = %s" % (str(self._dst), str(self._src)) + return "%s = %s" % (str(self.__dst), str(self.__src)) def get_r(self, mem_read=False, cst_read=False): - elements = self._src.get_r(mem_read, cst_read) - if isinstance(self._dst, ExprMem) and mem_read: - elements.update(self._dst.arg.get_r(mem_read, cst_read)) + elements = self.__src.get_r(mem_read, cst_read) + if isinstance(self.__dst, ExprMem) and mem_read: + elements.update(self.__dst.arg.get_r(mem_read, cst_read)) return elements def get_w(self): - if isinstance(self._dst, ExprMem): - return set([self._dst]) # [memreg] + if isinstance(self.__dst, ExprMem): + return set([self.__dst]) # [memreg] else: - return self._dst.get_w() + return self.__dst.get_w() def _exprhash(self): - return hash((EXPRAFF, hash(self._dst), hash(self._src))) + return hash((EXPRAFF, hash(self.__dst), hash(self.__src))) def _exprrepr(self): - return "%s(%r, %r)" % (self.__class__.__name__, self._dst, self._src) + return "%s(%r, %r)" % (self.__class__.__name__, self.__dst, self.__src) - def __contains__(self, e): - return self == e or self._src.__contains__(e) or self._dst.__contains__(e) - - # XXX /!\ for hackish expraff to slice - def get_modified_slice(self): - """Return an Expr list of extra expressions needed during the - object instanciation""" - - dst = self._dst - if not isinstance(self._src, ExprCompose): - raise ValueError("Get mod slice not on expraff slice", str(self)) - modified_s = [] - for arg in self._src.args: - if (not isinstance(arg[0], ExprSlice) or - arg[0].arg != dst or - arg[1] != arg[0].start or - arg[2] != arg[0].stop): - # If x is not the initial expression - modified_s.append(arg) - return modified_s + def __contains__(self, expr): + return (self == expr or + self.__src.__contains__(expr) or + self.__dst.__contains__(expr)) @visit_chk def visit(self, cb, tv=None): - dst, src = self._dst.visit(cb, tv), self._src.visit(cb, tv) - if dst == self._dst and src == self._src: + dst, src = self.__dst.visit(cb, tv), self.__src.visit(cb, tv) + if dst == self.__dst and src == self.__src: return self else: return ExprAff(dst, src) def copy(self): - return ExprAff(self._dst.copy(), self._src.copy()) + return ExprAff(self.__dst.copy(), self.__src.copy()) def depth(self): - return max(self._src.depth(), self._dst.depth()) + 1 + return max(self.__src.depth(), self.__dst.depth()) + 1 def graph_recursive(self, graph): graph.add_node(self) - for arg in [self._src, self._dst]: + for arg in [self.__src, self.__dst]: arg.graph_recursive(graph) graph.add_uniq_edge(self, arg) @@ -594,7 +617,7 @@ class ExprCond(Expr): - if (cond) then ... else ... """ - __slots__ = ["_cond", "_src1", "_src2"] + __slots__ = Expr.__slots__ + ["__cond", "__src1", "__src2"] def __init__(self, cond, src1, src2): """Create an ExprCond @@ -605,65 +628,74 @@ class ExprCond(Expr): super(ExprCond, self).__init__() + self.__cond, self.__src1, self.__src2 = cond, src1, src2 assert(src1.size == src2.size) + self.__size = self.src1.size - self._cond, self._src1, self._src2 = cond, src1, src2 - self._size = self.src1.size + size = property(lambda self: self.__size) + cond = property(lambda self: self.__cond) + src1 = property(lambda self: self.__src1) + src2 = property(lambda self: self.__src2) - cond = property(lambda self: self._cond) - src1 = property(lambda self: self._src1) - src2 = property(lambda self: self._src2) + def __getstate__(self): + return self.__cond, self.__src1, self.__src2 + + def __setstate__(self, state): + self.__init__(*state) + + def __new__(cls, cond, src1, src2): + return Expr.get_object(cls, (cond, src1, src2)) def __str__(self): - return "(%s?(%s,%s))" % (str(self._cond), str(self._src1), str(self._src2)) + return "(%s?(%s,%s))" % (str(self.__cond), str(self.__src1), str(self.__src2)) def get_r(self, mem_read=False, cst_read=False): - out_src1 = self._src1.get_r(mem_read, cst_read) - out_src2 = self._src2.get_r(mem_read, cst_read) - return self._cond.get_r(mem_read, - cst_read).union(out_src1).union(out_src2) + out_src1 = self.src1.get_r(mem_read, cst_read) + out_src2 = self.src2.get_r(mem_read, cst_read) + return self.cond.get_r(mem_read, + cst_read).union(out_src1).union(out_src2) def get_w(self): return set() def _exprhash(self): return hash((EXPRCOND, hash(self.cond), - hash(self._src1), hash(self._src2))) + hash(self.__src1), hash(self.__src2))) def _exprrepr(self): return "%s(%r, %r, %r)" % (self.__class__.__name__, - self._cond, self._src1, self._src2) + self.__cond, self.__src1, self.__src2) def __contains__(self, e): return (self == e or - self._cond.__contains__(e) or - self._src1.__contains__(e) or - self._src2.__contains__(e)) + self.cond.__contains__(e) or + self.src1.__contains__(e) or + self.src2.__contains__(e)) @visit_chk def visit(self, cb, tv=None): - cond = self._cond.visit(cb, tv) - src1 = self._src1.visit(cb, tv) - src2 = self._src2.visit(cb, tv) - if (cond == self._cond and - src1 == self._src1 and - src2 == self._src2): + cond = self.__cond.visit(cb, tv) + src1 = self.__src1.visit(cb, tv) + src2 = self.__src2.visit(cb, tv) + if (cond == self.__cond and + src1 == self.__src1 and + src2 == self.__src2): return self return ExprCond(cond, src1, src2) def copy(self): - return ExprCond(self._cond.copy(), - self._src1.copy(), - self._src2.copy()) + return ExprCond(self.__cond.copy(), + self.__src1.copy(), + self.__src2.copy()) def depth(self): - return max(self._cond.depth(), - self._src1.depth(), - self._src2.depth()) + 1 + return max(self.__cond.depth(), + self.__src1.depth(), + self.__src2.depth()) + 1 def graph_recursive(self, graph): graph.add_node(self) - for arg in [self._cond, self._src1, self._src2]: + for arg in [self.__cond, self.__src1, self.__src2]: arg.graph_recursive(graph) graph.add_uniq_edge(self, arg) @@ -677,7 +709,7 @@ class ExprMem(Expr): - Memory write """ - __slots__ = ["_arg", "_size"] + __slots__ = Expr.__slots__ + ["__arg"] def __init__(self, arg, size=32): """Create an ExprMem @@ -691,16 +723,26 @@ class ExprMem(Expr): raise ValueError( 'ExprMem: arg must be an Expr (not %s)' % type(arg)) - self._arg, self._size = arg, size + self.__arg, self.__size = arg, size + + size = property(lambda self: self.__size) + arg = property(lambda self: self.__arg) - arg = property(lambda self: self._arg) + def __getstate__(self): + return self.__arg, self.__size + + def __setstate__(self, state): + self.__init__(*state) + + def __new__(cls, arg, size=32): + return Expr.get_object(cls, (arg, size)) def __str__(self): - return "@%d[%s]" % (self._size, str(self._arg)) + return "@%d[%s]" % (self.size, str(self.arg)) def get_r(self, mem_read=False, cst_read=False): if mem_read: - return set(self._arg.get_r(mem_read, cst_read).union(set([self]))) + return set(self.__arg.get_r(mem_read, cst_read).union(set([self]))) else: return set([self]) @@ -708,36 +750,36 @@ class ExprMem(Expr): return set([self]) # [memreg] def _exprhash(self): - return hash((EXPRMEM, hash(self._arg), self._size)) + return hash((EXPRMEM, hash(self.__arg), self.__size)) def _exprrepr(self): return "%s(%r, %r)" % (self.__class__.__name__, - self._arg, self._size) + self.__arg, self.__size) - def __contains__(self, e): - return self == e or self._arg.__contains__(e) + def __contains__(self, expr): + return self == expr or self.__arg.__contains__(expr) @visit_chk def visit(self, cb, tv=None): - arg = self._arg.visit(cb, tv) - if arg == self._arg: + arg = self.__arg.visit(cb, tv) + if arg == self.__arg: return self - return ExprMem(arg, self._size) + return ExprMem(arg, self.size) def copy(self): - arg = self._arg.copy() - return ExprMem(arg, size=self._size) + arg = self.arg.copy() + return ExprMem(arg, size=self.size) def is_op_segm(self): - return isinstance(self._arg, ExprOp) and self._arg.op == 'segm' + return isinstance(self.__arg, ExprOp) and self.__arg.op == 'segm' def depth(self): - return self._arg.depth() + 1 + return self.__arg.depth() + 1 def graph_recursive(self, graph): graph.add_node(self) - self._arg.graph_recursive(graph) - graph.add_uniq_edge(self, self._arg) + self.__arg.graph_recursive(graph) + graph.add_uniq_edge(self, self.__arg) class ExprOp(Expr): @@ -750,7 +792,7 @@ class ExprOp(Expr): - parity bit(var1) """ - __slots__ = ["_op", "_args"] + __slots__ = Expr.__slots__ + ["__op", "__args"] def __init__(self, op, *args): """Create an ExprOp @@ -772,44 +814,44 @@ class ExprOp(Expr): if not isinstance(op, str): raise ValueError("ExprOp: 'op' argument must be a string") - self._op, self._args = op, tuple(args) + self.__op, self.__args = op, tuple(args) # Set size for special cases - if self._op in [ + if self.__op in [ '==', 'parity', 'fcom_c0', 'fcom_c1', 'fcom_c2', 'fcom_c3', 'fxam_c0', 'fxam_c1', 'fxam_c2', 'fxam_c3', "access_segment_ok", "load_segment_limit_ok", "bcdadd_cf", "ucomiss_zf", "ucomiss_pf", "ucomiss_cf"]: sz = 1 - elif self._op in [TOK_INF, TOK_INF_SIGNED, - TOK_INF_UNSIGNED, TOK_INF_EQUAL, - TOK_INF_EQUAL_SIGNED, TOK_INF_EQUAL_UNSIGNED, - TOK_EQUAL, TOK_POS, - TOK_POS_STRICT, - ]: + elif self.__op in [TOK_INF, TOK_INF_SIGNED, + TOK_INF_UNSIGNED, TOK_INF_EQUAL, + TOK_INF_EQUAL_SIGNED, TOK_INF_EQUAL_UNSIGNED, + TOK_EQUAL, TOK_POS, + TOK_POS_STRICT, + ]: sz = 1 - elif self._op in ['mem_16_to_double', 'mem_32_to_double', - 'mem_64_to_double', 'mem_80_to_double', - 'int_16_to_double', 'int_32_to_double', - 'int_64_to_double', 'int_80_to_double']: + elif self.__op in ['mem_16_to_double', 'mem_32_to_double', + 'mem_64_to_double', 'mem_80_to_double', + 'int_16_to_double', 'int_32_to_double', + 'int_64_to_double', 'int_80_to_double']: sz = 64 - elif self._op in ['double_to_mem_16', 'double_to_int_16', - 'float_trunc_to_int_16', 'double_trunc_to_int_16']: + elif self.__op in ['double_to_mem_16', 'double_to_int_16', + 'float_trunc_to_int_16', 'double_trunc_to_int_16']: sz = 16 - elif self._op in ['double_to_mem_32', 'double_to_int_32', - 'float_trunc_to_int_32', 'double_trunc_to_int_32', - 'double_to_float']: + elif self.__op in ['double_to_mem_32', 'double_to_int_32', + 'float_trunc_to_int_32', 'double_trunc_to_int_32', + 'double_to_float']: sz = 32 - elif self._op in ['double_to_mem_64', 'double_to_int_64', - 'float_trunc_to_int_64', 'double_trunc_to_int_64', - 'float_to_double']: + elif self.__op in ['double_to_mem_64', 'double_to_int_64', + 'float_trunc_to_int_64', 'double_trunc_to_int_64', + 'float_to_double']: sz = 64 - elif self._op in ['double_to_mem_80', 'double_to_int_80', - 'float_trunc_to_int_80', - 'double_trunc_to_int_80']: + elif self.__op in ['double_to_mem_80', 'double_to_int_80', + 'float_trunc_to_int_80', + 'double_trunc_to_int_80']: sz = 80 - elif self._op in ['segm']: - sz = self._args[1].size + elif self.__op in ['segm']: + sz = self.__args[1].size else: if None in sizes: sz = None @@ -817,256 +859,275 @@ class ExprOp(Expr): # All arguments have the same size sz = list(sizes)[0] - self._size = sz + self.__size = sz - op = property(lambda self: self._op) - args = property(lambda self: self._args) + size = property(lambda self: self.__size) + op = property(lambda self: self.__op) + args = property(lambda self: self.__args) + + def __getstate__(self): + return self.__op, self.__args + + def __setstate__(self, state): + op, args = state + self.__init__(op, *args) + + def __new__(cls, op, *args): + return Expr.get_object(cls, (op, args)) def __str__(self): if self.is_associative(): - return '(' + self._op.join([str(arg) for arg in self._args]) + ')' - if (self._op.startswith('call_func_') or - self._op == 'cpuid' or - len(self._args) > 2 or - self._op in ['parity', 'segm']): - return self._op + '(' + ', '.join([str(arg) for arg in self._args]) + ')' - if len(self._args) == 2: - return ('(' + str(self._args[0]) + - ' ' + self.op + ' ' + str(self._args[1]) + ')') + return '(' + self.__op.join([str(arg) for arg in self.__args]) + ')' + if (self.__op.startswith('call_func_') or + self.__op == 'cpuid' or + len(self.__args) > 2 or + self.__op in ['parity', 'segm']): + return self.__op + '(' + ', '.join([str(arg) for arg in self.__args]) + ')' + if len(self.__args) == 2: + return ('(' + str(self.__args[0]) + + ' ' + self.op + ' ' + str(self.__args[1]) + ')') else: return reduce(lambda x, y: x + ' ' + str(y), - self._args, - '(' + str(self._op)) + ')' + self.__args, + '(' + str(self.__op)) + ')' def get_r(self, mem_read=False, cst_read=False): return reduce(lambda elements, arg: - elements.union(arg.get_r(mem_read, cst_read)), self._args, set()) + elements.union(arg.get_r(mem_read, cst_read)), self.__args, set()) def get_w(self): raise ValueError('op cannot be written!', self) def _exprhash(self): - h_hargs = [hash(arg) for arg in self._args] - return hash((EXPROP, self._op, tuple(h_hargs))) + h_hargs = [hash(arg) for arg in self.__args] + return hash((EXPROP, self.__op, tuple(h_hargs))) def _exprrepr(self): - return "%s(%r, %s)" % (self.__class__.__name__, self._op, - ', '.join(repr(arg) for arg in self._args)) + return "%s(%r, %s)" % (self.__class__.__name__, self.__op, + ', '.join(repr(arg) for arg in self.__args)) def __contains__(self, e): if self == e: return True - for arg in self._args: + for arg in self.__args: if arg.__contains__(e): return True return False def is_function_call(self): - return self._op.startswith('call') + return self.__op.startswith('call') def is_associative(self): "Return True iff current operation is associative" - return (self._op in ['+', '*', '^', '&', '|']) + return (self.__op in ['+', '*', '^', '&', '|']) def is_commutative(self): "Return True iff current operation is commutative" - return (self._op in ['+', '*', '^', '&', '|']) + return (self.__op in ['+', '*', '^', '&', '|']) @visit_chk def visit(self, cb, tv=None): - args = [arg.visit(cb, tv) for arg in self._args] - modified = any([arg[0] != arg[1] for arg in zip(self._args, args)]) + args = [arg.visit(cb, tv) for arg in self.__args] + modified = any([arg[0] != arg[1] for arg in zip(self.__args, args)]) if modified: - return ExprOp(self._op, *args) + return ExprOp(self.__op, *args) return self def copy(self): - args = [arg.copy() for arg in self._args] - return ExprOp(self._op, *args) + args = [arg.copy() for arg in self.__args] + return ExprOp(self.__op, *args) def depth(self): - depth = [arg.depth() for arg in self._args] + depth = [arg.depth() for arg in self.__args] return max(depth) + 1 def graph_recursive(self, graph): graph.add_node(self) - for arg in self._args: + for arg in self.__args: arg.graph_recursive(graph) graph.add_uniq_edge(self, arg) class ExprSlice(Expr): - __slots__ = ["_arg", "_start", "_stop"] + __slots__ = Expr.__slots__ + ["__arg", "__start", "__stop"] def __init__(self, arg, start, stop): super(ExprSlice, self).__init__() assert(start < stop) + self.__arg, self.__start, self.__stop = arg, start, stop + self.__size = self.__stop - self.__start - self._arg, self._start, self._stop = arg, start, stop - self._size = self._stop - self._start + size = property(lambda self: self.__size) + arg = property(lambda self: self.__arg) + start = property(lambda self: self.__start) + stop = property(lambda self: self.__stop) - arg = property(lambda self: self._arg) - start = property(lambda self: self._start) - stop = property(lambda self: self._stop) + def __getstate__(self): + return self.__arg, self.__start, self.__stop + + def __setstate__(self, state): + self.__init__(*state) + + def __new__(cls, arg, start, stop): + return Expr.get_object(cls, (arg, start, stop)) def __str__(self): - return "%s[%d:%d]" % (str(self._arg), self._start, self._stop) + return "%s[%d:%d]" % (str(self.__arg), self.__start, self.__stop) def get_r(self, mem_read=False, cst_read=False): - return self._arg.get_r(mem_read, cst_read) + return self.__arg.get_r(mem_read, cst_read) def get_w(self): - return self._arg.get_w() + return self.__arg.get_w() def _exprhash(self): - return hash((EXPRSLICE, hash(self._arg), self._start, self._stop)) + return hash((EXPRSLICE, hash(self.__arg), self.__start, self.__stop)) def _exprrepr(self): - return "%s(%r, %d, %d)" % (self.__class__.__name__, self._arg, - self._start, self._stop) + return "%s(%r, %d, %d)" % (self.__class__.__name__, self.__arg, + self.__start, self.__stop) - def __contains__(self, e): - if self == e: + def __contains__(self, expr): + if self == expr: return True - return self._arg.__contains__(e) + return self.__arg.__contains__(expr) @visit_chk def visit(self, cb, tv=None): - arg = self._arg.visit(cb, tv) - if arg == self._arg: + arg = self.__arg.visit(cb, tv) + if arg == self.__arg: return self - return ExprSlice(arg, self._start, self._stop) + return ExprSlice(arg, self.__start, self.__stop) def copy(self): - return ExprSlice(self._arg.copy(), self._start, self._stop) + return ExprSlice(self.__arg.copy(), self.__start, self.__stop) def depth(self): - return self._arg.depth() + 1 + return self.__arg.depth() + 1 def slice_rest(self): "Return the completion of the current slice" - size = self._arg.size - if self._start >= size or self._stop > size: + size = self.__arg.size + if self.__start >= size or self.__stop > size: raise ValueError('bad slice rest %s %s %s' % - (size, self._start, self._stop)) + (size, self.__start, self.__stop)) - if self._start == self._stop: + if self.__start == self.__stop: return [(0, size)] rest = [] - if self._start != 0: - rest.append((0, self._start)) - if self._stop < size: - rest.append((self._stop, size)) + if self.__start != 0: + rest.append((0, self.__start)) + if self.__stop < size: + rest.append((self.__stop, size)) return rest def graph_recursive(self, graph): graph.add_node(self) - self._arg.graph_recursive(graph) - graph.add_uniq_edge(self, self._arg) + self.__arg.graph_recursive(graph) + graph.add_uniq_edge(self, self.__arg) class ExprCompose(Expr): """ - Compose is like a hambuger. - It's arguments are tuple of: (Expression, start, stop) - start and stop are intergers, determining Expression position in the compose. - - Burger Example: - ExprCompose([(salad, 0, 3), (cheese, 3, 10), (beacon, 10, 16)]) - In the example, salad.size == 3. + Compose is like a hambuger. It concatenate Expressions """ - __slots__ = ["_args"] + __slots__ = Expr.__slots__ + ["__args"] - def __init__(self, args): + def __init__(self, *args): """Create an ExprCompose The ExprCompose is contiguous and starts at 0 - @args: tuple(Expr, int, int) + @args: [Expr, Expr, ...] + DEPRECATED: + @args: [(Expr, int, int), (Expr, int, int), ...] """ super(ExprCompose, self).__init__() - last_stop = 0 - args = sorted(args, key=itemgetter(1)) - for e, start, stop in args: - if e.size != stop - start: - raise ValueError( - "sanitycheck: ExprCompose args must have correct size!" + - " %r %r %r" % (e, e.size, stop - start)) - if last_stop != start: - raise ValueError( - "sanitycheck: ExprCompose args must be contiguous!" + - " %r" % (args)) - last_stop = stop + is_new_style = args and isinstance(args[0], Expr) + if not is_new_style: + warnings.warn('DEPRECATION WARNING: use "ExprCompose(a, b) instead of'+ + 'ExprCemul_ir_block(self, addr, step=False)" instead of emul_ir_bloc') + + self.__args = tuple(args) + self.__size = sum([arg.size for arg in args]) + + size = property(lambda self: self.__size) + args = property(lambda self: self.__args) - # Transform args to lists - o = [] - for e, a, b in args: - assert(a >= 0 and b >= 0) - o.append(tuple([e, a, b])) - self._args = tuple(o) + def __getstate__(self): + return self.__args - self._size = self._args[-1][2] + def __setstate__(self, state): + self.__init__(state) - args = property(lambda self: self._args) + def __new__(cls, *args): + is_new_style = args and isinstance(args[0], Expr) + if not is_new_style: + assert len(args) == 1 + args = args[0] + return Expr.get_object(cls, tuple(args)) def __str__(self): - return '{' + ', '.join(['%s,%d,%d' % - (str(arg[0]), arg[1], arg[2]) for arg in self._args]) + '}' + return '{' + ', '.join([str(arg) for arg in self.__args]) + '}' def get_r(self, mem_read=False, cst_read=False): return reduce(lambda elements, arg: - elements.union(arg[0].get_r(mem_read, cst_read)), self._args, set()) + elements.union(arg.get_r(mem_read, cst_read)), self.__args, set()) def get_w(self): return reduce(lambda elements, arg: - elements.union(arg[0].get_w()), self._args, set()) + elements.union(arg.get_w()), self.__args, set()) def _exprhash(self): - h_args = [EXPRCOMPOSE] + [(hash(arg[0]), arg[1], arg[2]) - for arg in self._args] + h_args = [EXPRCOMPOSE] + [hash(arg) for arg in self.__args] return hash(tuple(h_args)) def _exprrepr(self): - return "%s(%r)" % (self.__class__.__name__, self._args) + return "%s([%r])" % (self.__class__.__name__, self.__args) def __contains__(self, e): if self == e: return True - for arg in self._args: + for arg in self.__args: if arg == e: return True - if arg[0].__contains__(e): + if arg.__contains__(e): return True return False @visit_chk def visit(self, cb, tv=None): - args = [(arg[0].visit(cb, tv), arg[1], arg[2]) for arg in self._args] - modified = any([arg[0] != arg[1] for arg in zip(self._args, args)]) + args = [arg.visit(cb, tv) for arg in self.__args] + modified = any([arg != arg_new for arg, arg_new in zip(self.__args, args)]) if modified: - return ExprCompose(args) + return ExprCompose(*args) return self def copy(self): - args = [(arg[0].copy(), arg[1], arg[2]) for arg in self._args] - return ExprCompose(args) + args = [arg.copy() for arg in self.__args] + return ExprCompose(*args) def depth(self): - depth = [arg[0].depth() for arg in self._args] + depth = [arg.depth() for arg in self.__args] return max(depth) + 1 def graph_recursive(self, graph): graph.add_node(self) for arg in self.args: - arg[0].graph_recursive(graph) - graph.add_uniq_edge(self, arg[0]) + arg.graph_recursive(graph) + graph.add_uniq_edge(self, arg) + def iter_args(self): + index = 0 + for arg in self.__args: + yield index, arg + index += arg.size # Expression order for comparaison expr_order_dict = {ExprId: 1, @@ -1094,7 +1155,7 @@ def compare_exprs_compose(e1, e2): def compare_expr_list_compose(l1_e, l2_e): # Sort by list elements in incremental order, then by list size for i in xrange(min(len(l1_e), len(l2_e))): - x = compare_exprs_compose(l1_e[i], l2_e[i]) + x = compare_exprs(l1_e[i], l2_e[i]) if x: return x return cmp(len(l1_e), len(l2_e)) @@ -1325,9 +1386,7 @@ def MatchExpr(e, m, tks, result=None): if not isinstance(m, ExprCompose): return False for a1, a2 in zip(e.args, m.args): - if a1[1] != a2[1] or a1[2] != a2[2]: - return False - r = MatchExpr(a1[0], a2[0], tks, result) + r = MatchExpr(a1, a2, tks, result) if r is False: return False return result diff --git a/miasm2/expression/expression_helper.py b/miasm2/expression/expression_helper.py index 0c661c2a..8babba70 100644 --- a/miasm2/expression/expression_helper.py +++ b/miasm2/expression/expression_helper.py @@ -34,103 +34,76 @@ def parity(a): return cpt -def merge_sliceto_slice(args): - sources = {} - non_slice = {} - sources_int = {} - for a in args: - if isinstance(a[0], m2_expr.ExprInt): - # sources_int[a.start] = a - # copy ExprInt because we will inplace modify arg just below - # /!\ TODO XXX never ever modify inplace args... - sources_int[a[1]] = (m2_expr.ExprInt(int(a[0]), - a[2] - a[1]), - a[1], - a[2]) - elif isinstance(a[0], m2_expr.ExprSlice): - if not a[0].arg in sources: - sources[a[0].arg] = [] - sources[a[0].arg].append(a) +def merge_sliceto_slice(expr): + """ + Apply basic factorisation on ExprCompose sub compoenents + @expr: ExprCompose + """ + + slices_raw = [] + other_raw = [] + integers_raw = [] + for index, arg in expr.iter_args(): + if isinstance(arg, m2_expr.ExprInt): + integers_raw.append((index, arg)) + elif isinstance(arg, m2_expr.ExprSlice): + slices_raw.append((index, arg)) else: - non_slice[a[1]] = a - # find max stop to determine size - max_size = None - for a in args: - if max_size is None or max_size < a[2]: - max_size = a[2] - - # first simplify all num slices - final_sources = [] - sorted_s = [] - for x in sources_int.values(): - x = list(x) - # mask int - v = x[0].arg & ((1 << (x[2] - x[1])) - 1) - x[0] = m2_expr.ExprInt_from(x[0], v) - x = tuple(x) - sorted_s.append((x[1], x)) - sorted_s.sort() - while sorted_s: - start, v = sorted_s.pop() - out = [m2_expr.ExprInt(v[0].arg), v[1], v[2]] - size = v[2] - v[1] - while sorted_s: - if sorted_s[-1][1][2] != start: + other_raw.append((index, arg)) + + # Find max stop to determine size + max_size = sum([arg.size for arg in expr.args]) + + integers_merged = [] + # Merge consecutive integers + while integers_raw: + index, arg = integers_raw.pop() + new_size = arg.size + value = int(arg) + while integers_raw: + prev_index, prev_value = integers_raw[-1] + # Check if intergers are consecutive + if prev_index + prev_value.size != index: break - s_start, s_stop = sorted_s[-1][1][1], sorted_s[-1][1][2] - size += s_stop - s_start - a = m2_expr.mod_size2uint[size]( - (int(out[0]) << (out[1] - s_start)) + - int(sorted_s[-1][1][0])) - out[0] = m2_expr.ExprInt(a) - sorted_s.pop() - out[1] = s_start - out[0] = m2_expr.ExprInt(int(out[0]), size) - final_sources.append((start, out)) - - final_sources_int = final_sources - # check if same sources have corresponding start/stop - # is slice AND is sliceto - simp_sources = [] - for args in sources.values(): - final_sources = [] - sorted_s = [] - for x in args: - sorted_s.append((x[1], x)) - sorted_s.sort() - while sorted_s: - start, v = sorted_s.pop() - ee = v[0].arg[v[0].start:v[0].stop] - out = ee, v[1], v[2] - while sorted_s: - if sorted_s[-1][1][2] != start: - break - if sorted_s[-1][1][0].stop != out[0].start: - break - - start = sorted_s[-1][1][1] - # out[0].start = sorted_s[-1][1][0].start - o_e, _, o_stop = out - o1, o2 = sorted_s[-1][1][0].start, o_e.stop - o_e = o_e.arg[o1:o2] - out = o_e, start, o_stop - # update _size - # out[0]._size = out[0].stop-out[0].start - sorted_s.pop() - out = out[0], start, out[2] - - final_sources.append((start, out)) + # Merge integers + index = prev_index + new_size += prev_value.size + value = value << prev_value.size + value |= int(prev_value) + integers_raw.pop() + integers_merged.append((index, m2_expr.ExprInt(value, new_size))) + + + slices_merged = [] + # Merge consecutive slices + while slices_raw: + index, arg = slices_raw.pop() + value, slice_start, slice_stop = arg.arg, arg.start, arg.stop + while slices_raw: + prev_index, prev_value = slices_raw[-1] + # Check if slices are consecutive + if prev_index + prev_value.size != index: + break + # Check if slices can ben merged + if prev_value.arg != value: + break + if prev_value.stop != slice_start: + break + # Merge slices + index = prev_index + slice_start = prev_value.start + slices_raw.pop() + slices_merged.append((index, value[slice_start:slice_stop])) - simp_sources += final_sources - simp_sources += final_sources_int + new_args = slices_merged + integers_merged + other_raw + new_args.sort() + for i, (index, arg) in enumerate(new_args[:-1]): + assert index + arg.size == new_args[i+1][0] + ret = [arg[1] for arg in new_args] - for i, v in non_slice.items(): - simp_sources.append((i, v)) + return ret - simp_sources.sort() - simp_sources = [x[1] for x in simp_sources] - return simp_sources op_propag_cst = ['+', '*', '^', '&', '|', '>>', @@ -210,9 +183,6 @@ class Variables_Identifier(object): - original expression with variables translated """ - # Attribute used to distinguish created variables from original ones - is_var_ident = "is_var_ident" - def __init__(self, expr, var_prefix="v"): """Set the expression @expr to handle and launch variable identification process @@ -287,13 +257,11 @@ class Variables_Identifier(object): for element_done in done: todo.remove(element_done) - @classmethod - def is_var_identifier(cls, expr): + def is_var_identifier(self, expr): "Return True iff @expr is a variable identifier" if not isinstance(expr, m2_expr.ExprId): return False - - return expr.is_var_ident + return expr in self._vars def find_variables_rec(self, expr): """Recursive method called by find_variable to expand @expr. @@ -310,7 +278,6 @@ class Variables_Identifier(object): identifier = m2_expr.ExprId("%s%s" % (self.var_prefix, self.var_indice.next()), size = expr.size) - identifier.is_var_ident = True self._vars[identifier] = expr # Recursion stop case @@ -333,8 +300,8 @@ class Variables_Identifier(object): self.find_variables_rec(expr.arg) elif isinstance(expr, m2_expr.ExprCompose): - for a in expr.args: - self.find_variables_rec(list(a)[0]) + for arg in expr.args: + self.find_variables_rec(arg) elif isinstance(expr, m2_expr.ExprSlice): self.find_variables_rec(expr.arg) @@ -455,21 +422,19 @@ class ExprRandom(object): """ # First layer upper_bound = random.randint(1, size) - args = [(cls._gen(size=upper_bound, depth=depth - 1), 0, upper_bound)] + args = [cls._gen(size=upper_bound, depth=depth - 1)] # Next layers while (upper_bound < size): if len(args) == (cls.compose_max_layer - 1): # We reach the maximum size - upper_bound = size + new_upper_bound = size else: - upper_bound = random.randint(args[-1][-1] + 1, size) + new_upper_bound = random.randint(upper_bound + 1, size) - args.append((cls._gen(size=upper_bound - args[-1][-1]), - args[-1][-1], - upper_bound)) - - return m2_expr.ExprCompose(args) + args.append(cls._gen(size=new_upper_bound - upper_bound)) + upper_bound = new_upper_bound + return m2_expr.ExprCompose(*args) @classmethod def memory(cls, size=32, depth=1): @@ -654,22 +619,17 @@ def possible_values(expr): elif isinstance(expr, m2_expr.ExprCompose): # Generate each possibility for sub-argument, associated with the start # and stop bit - consvals_args = [map(lambda x: (x, arg[1], arg[2]), - possible_values(arg[0])) + consvals_args = [map(lambda x: x, possible_values(arg)) for arg in expr.args] for consvals_possibility in itertools.product(*consvals_args): # Merge constraint of each sub-element - args_constraint = itertools.chain(*[consval[0].constraints + args_constraint = itertools.chain(*[consval.constraints for consval in consvals_possibility]) # Gen the corresponding constraints / ExprCompose + args = [consval.value for consval in consvals_possibility] consvals.add( ConstrainedValue(frozenset(args_constraint), - m2_expr.ExprCompose( - [(consval[0].value, - consval[1], - consval[2]) - for consval in consvals_possibility] - ))) + m2_expr.ExprCompose(*args))) else: raise RuntimeError("Unsupported type for expr: %s" % type(expr)) diff --git a/miasm2/expression/simplifications.py b/miasm2/expression/simplifications.py index cbffb219..dd4f5c04 100644 --- a/miasm2/expression/simplifications.py +++ b/miasm2/expression/simplifications.py @@ -48,6 +48,7 @@ class ExpressionSimplifier(object): def __init__(self): self.expr_simp_cb = {} + self.simplified_exprs = set() def enable_passes(self, passes): """Add passes from @passes @@ -80,7 +81,7 @@ class ExpressionSimplifier(object): @expression: Expr instance Return an Expr instance""" - if expression.is_simp: + if expression in self.simplified_exprs: return expression # Find a stable state @@ -92,10 +93,10 @@ class ExpressionSimplifier(object): # Launch recursivity expression = self.expr_simp_wrapper(e_new) - expression.is_simp = True - + self.simplified_exprs.add(expression) # Mark expression as simplified - e_new.is_simp = True + self.simplified_exprs.add(e_new) + return e_new def expr_simp_wrapper(self, expression, callback=None): @@ -104,13 +105,13 @@ class ExpressionSimplifier(object): @manual_callback: If set, call this function instead of normal one Return an Expr instance""" - if expression.is_simp: + if expression in self.simplified_exprs: return expression if callback is None: callback = self.expr_simp - return expression.visit(callback, lambda e: not(e.is_simp)) + return expression.visit(callback, lambda e: e not in self.simplified_exprs) def __call__(self, expression, callback=None): "Wrapper on expr_simp_wrapper" diff --git a/miasm2/expression/simplifications_common.py b/miasm2/expression/simplifications_common.py index 49dfbcc0..a070fb81 100644 --- a/miasm2/expression/simplifications_common.py +++ b/miasm2/expression/simplifications_common.py @@ -265,13 +265,14 @@ def simp_cst_propagation(e_s, e): args = new_args # A << int with A ExprCompose => move index - if op == "<<" and isinstance(args[0], ExprCompose) and isinstance(args[1], ExprInt): + if (op == "<<" and isinstance(args[0], ExprCompose) and + isinstance(args[1], ExprInt) and int(args[1]) != 0): final_size = args[0].size shift = int(args[1]) new_args = [] # shift indexes - for expr, start, stop in args[0].args: - new_args.append((expr, start+shift, stop+shift)) + for index, arg in args[0].iter_args(): + new_args.append((arg, index+shift, index+shift+arg.size)) # filter out expression filter_args = [] min_index = final_size @@ -281,12 +282,13 @@ def simp_cst_propagation(e_s, e): if stop > final_size: expr = expr[:expr.size - (stop - final_size)] stop = final_size - filter_args.append((expr, start, stop)) + filter_args.append(expr) min_index = min(start, min_index) # create entry 0 + assert min_index != 0 expr = ExprInt(0, min_index) - filter_args = [(expr, 0, min_index)] + filter_args - return ExprCompose(filter_args) + args = [expr] + filter_args + return ExprCompose(*args) # A >> int with A ExprCompose => move index if op == ">>" and isinstance(args[0], ExprCompose) and isinstance(args[1], ExprInt): @@ -294,8 +296,8 @@ def simp_cst_propagation(e_s, e): shift = int(args[1]) new_args = [] # shift indexes - for expr, start, stop in args[0].args: - new_args.append((expr, start-shift, stop-shift)) + for index, arg in args[0].iter_args(): + new_args.append((arg, index-shift, index+arg.size-shift)) # filter out expression filter_args = [] max_index = 0 @@ -305,29 +307,30 @@ def simp_cst_propagation(e_s, e): if start < 0: expr = expr[-start:] start = 0 - filter_args.append((expr, start, stop)) + filter_args.append(expr) max_index = max(stop, max_index) # create entry 0 expr = ExprInt(0, final_size - max_index) - filter_args += [(expr, max_index, final_size)] - return ExprCompose(filter_args) + args = filter_args + [expr] + return ExprCompose(*args) # Compose(a) OP Compose(b) with a/b same bounds => Compose(a OP b) if op in ['|', '&', '^'] and all([isinstance(arg, ExprCompose) for arg in args]): bounds = set() for arg in args: - bound = tuple([(start, stop) for (expr, start, stop) in arg.args]) + bound = tuple([expr.size for expr in arg.args]) bounds.add(bound) if len(bounds) == 1: bound = list(bounds)[0] - new_args = [[expr] for (expr, start, stop) in args[0].args] + new_args = [[expr] for expr in args[0].args] for sub_arg in args[1:]: - for i, (expr, start, stop) in enumerate(sub_arg.args): + for i, expr in enumerate(sub_arg.args): new_args[i].append(expr) + args = [] for i, arg in enumerate(new_args): - new_args[i] = ExprOp(op, *arg), bound[i][0], bound[i][1] - return ExprCompose(new_args) + args.append(ExprOp(op, *arg)) + return ExprCompose(*args) # <<<c_rez, >>>c_rez if op in [">>>c_rez", "<<<c_rez"]: @@ -448,40 +451,41 @@ def simp_slice(e_s, e): return new_e elif isinstance(e.arg, ExprCompose): # Slice(Compose(A), x) => Slice(A, y) - for a in e.arg.args: - if a[1] <= e.start and a[2] >= e.stop: - new_e = a[0][e.start - a[1]:e.stop - a[1]] + for index, arg in e.arg.iter_args(): + if index <= e.start and index+arg.size >= e.stop: + new_e = arg[e.start - index:e.stop - index] return new_e # Slice(Compose(A, B, C), x) => Compose(A, B, C) with truncated A/B/C out = [] - for arg, s_start, s_stop in e.arg.args: + for index, arg in e.arg.iter_args(): # arg is before slice start - if e.start >= s_stop: + if e.start >= index + arg.size: continue # arg is after slice stop - elif e.stop <= s_start: + elif e.stop <= index: continue # arg is fully included in slice - elif e.start <= s_start and s_stop <= e.stop: - out.append((arg, s_start - e.start, s_stop - e.start)) + elif e.start <= index and index + arg.size <= e.stop: + out.append(arg) continue # arg is truncated at start - if e.start > s_start: - slice_start = e.start - s_start + if e.start > index: + slice_start = e.start - index a_start = 0 else: # arg is not truncated at start slice_start = 0 - a_start = s_start - e.start + a_start = index - e.start # a is truncated at stop - if e.stop < s_stop: - slice_stop = arg.size + e.stop - s_stop - slice_start + if e.stop < index + arg.size: + slice_stop = arg.size + e.stop - (index + arg.size) - slice_start a_stop = e.stop - e.start else: slice_stop = arg.size - a_stop = s_stop - e.start - out.append((arg[slice_start:slice_stop], a_start, a_stop)) - return ExprCompose(out) + a_stop = index + arg.size - e.start + out.append(arg[slice_start:slice_stop]) + + return ExprCompose(*out) # ExprMem(x, size)[:A] => ExprMem(x, a) # XXXX todo hum, is it safe? @@ -533,68 +537,61 @@ def simp_slice(e_s, e): def simp_compose(e_s, e): "Commons simplification on ExprCompose" - args = merge_sliceto_slice(e.args) + args = merge_sliceto_slice(e) out = [] # compose of compose - for a in args: - if isinstance(a[0], ExprCompose): - for x, start, stop in a[0].args: - out.append((x, start + a[1], stop + a[1])) + for arg in args: + if isinstance(arg, ExprCompose): + out += arg.args else: - out.append(a) + out.append(arg) args = out # Compose(a) with a.size = compose.size => a - if len(args) == 1 and args[0][1] == 0 and args[0][2] == e.size: - return args[0][0] + if len(args) == 1 and args[0].size == e.size: + return args[0] # {(X[z:], 0, X.size-z), (0, X.size-z, X.size)} => (X >> z) if (len(args) == 2 and - isinstance(args[1][0], ExprInt) and - args[1][0].arg == 0): - a1 = args[0] - a2 = args[1] - if (isinstance(a1[0], ExprSlice) and - a1[1] == 0 and - a1[0].stop == a1[0].arg.size and - a2[1] == a1[0].size and - a2[2] == a1[0].arg.size): - new_e = a1[0].arg >> ExprInt( - a1[0].start, a1[0].arg.size) + isinstance(args[1], ExprInt) and + int(args[1]) == 0): + if (isinstance(args[0], ExprSlice) and + args[0].stop == args[0].arg.size and + args[0].size + args[1].size == args[0].arg.size): + new_e = args[0].arg >> ExprInt(args[0].start, args[0].arg.size) return new_e # Compose with ExprCond with integers for src1/src2 and intergers => # propagage integers # {XXX?(0x0,0x1)?(0x0,0x1),0,8, 0x0,8,32} => XXX?(int1, int2) - ok = True - expr_cond = None - expr_ints = [] - for i, a in enumerate(args): - if not is_int_or_cond_src_int(a[0]): + expr_cond_index = None + expr_ints_or_conds = [] + for i, arg in enumerate(args): + if not is_int_or_cond_src_int(arg): ok = False break - expr_ints.append(a) - if isinstance(a[0], ExprCond): - if expr_cond is not None: + expr_ints_or_conds.append(arg) + if isinstance(arg, ExprCond): + if expr_cond_index is not None: ok = False - expr_cond = i - cond = a[0] + expr_cond_index = i + cond = arg - if ok and expr_cond is not None: + if ok and expr_cond_index is not None: src1 = [] src2 = [] - for i, a in enumerate(expr_ints): - if i == expr_cond: - src1.append((a[0].src1, a[1], a[2])) - src2.append((a[0].src2, a[1], a[2])) + for i, arg in enumerate(expr_ints_or_conds): + if i == expr_cond_index: + src1.append(arg.src1) + src2.append(arg.src2) else: - src1.append(a) - src2.append(a) - src1 = e_s.apply_simp(ExprCompose(src1)) - src2 = e_s.apply_simp(ExprCompose(src2)) + src1.append(arg) + src2.append(arg) + src1 = e_s.apply_simp(ExprCompose(*src1)) + src2 = e_s.apply_simp(ExprCompose(*src2)) if isinstance(src1, ExprInt) and isinstance(src2, ExprInt): return ExprCond(cond.cond, src1, src2) - return ExprCompose(args) + return ExprCompose(*args) def simp_cond(e_s, e): diff --git a/miasm2/ir/analysis.py b/miasm2/ir/analysis.py index 2aa853f0..70fda11c 100644 --- a/miasm2/ir/analysis.py +++ b/miasm2/ir/analysis.py @@ -283,11 +283,8 @@ class ira(ir): def gen_equations(self): for irb in self.blocs.values(): - symbols_init = {} - for r in self.arch.regs.all_regs_ids: - x = ExprId(r.name, r.size) - x.is_term = True - symbols_init[r] = x + symbols_init = dict(self.arch.regs.all_regs_ids_init) + sb = symbexec(self, dict(symbols_init)) sb.emulbloc(irb) eqs = [] diff --git a/miasm2/ir/ir.py b/miasm2/ir/ir.py index 3a841fa5..d8cccc64 100644 --- a/miasm2/ir/ir.py +++ b/miasm2/ir/ir.py @@ -46,7 +46,6 @@ class AssignBlock(dict): * if dst is an ExprSlice, expand it to affect the full Expression * if dst already known, sources are merged """ - if dst.size != src.size: raise RuntimeError( "sanitycheck: args must have same size! %s" % @@ -59,7 +58,8 @@ class AssignBlock(dict): for r in dst.slice_rest()] all_a = [(src, dst.start, dst.stop)] + rest all_a.sort(key=lambda x: x[1]) - new_src = m2_expr.ExprCompose(all_a) + args = [expr for (expr, _, _) in all_a] + new_src = m2_expr.ExprCompose(*args) else: new_dst, new_src = dst, src @@ -74,6 +74,7 @@ class AssignBlock(dict): expr_list = [(new_dst, new_src), (new_dst, self[new_dst])] # Find collision + print 'FIND COLISION' e_colision = reduce(lambda x, y: x.union(y), (self.get_modified_slice(dst, src) for (dst, src) in expr_list), @@ -95,7 +96,12 @@ class AssignBlock(dict): for interval in missing_i) # Build the merging expression - new_src = m2_expr.ExprCompose(e_colision.union(remaining)) + args = list(e_colision.union(remaining)) + args.sort(key=lambda x:x[1]) + starts = [start for (_, start, _) in args] + assert len(set(starts)) == len(starts) + args = [expr for (expr, _, _) in args] + new_src = m2_expr.ExprCompose(*args) super(AssignBlock, self).__setitem__(new_dst, new_src) @@ -103,17 +109,16 @@ class AssignBlock(dict): def get_modified_slice(dst, src): """Return an Expr list of extra expressions needed during the object instanciation""" - if not isinstance(src, m2_expr.ExprCompose): raise ValueError("Get mod slice not on expraff slice", str(self)) modified_s = [] - for arg in src.args: - if (not isinstance(arg[0], m2_expr.ExprSlice) or - arg[0].arg != dst or - arg[1] != arg[0].start or - arg[2] != arg[0].stop): + for index, arg in src.iter_args(): + if not (isinstance(arg, m2_expr.ExprSlice) and + arg.arg == dst and + index == arg.start and + index+arg.size == arg.stop): # If x is not the initial expression - modified_s.append(arg) + modified_s.append((arg, index, index+arg.size)) return modified_s def get_w(self): diff --git a/miasm2/ir/symbexec.py b/miasm2/ir/symbexec.py index 1dc8dde1..db3eacdc 100644 --- a/miasm2/ir/symbexec.py +++ b/miasm2/ir/symbexec.py @@ -3,6 +3,10 @@ from miasm2.expression.modint import int32 from miasm2.expression.simplifications import expr_simp from miasm2.core import asmbloc from miasm2.ir.ir import AssignBlock +from miasm2.core.interval import interval +from miasm2.core.utils import get_caller_name +import warnings + import logging @@ -13,72 +17,82 @@ log.addHandler(console_handler) log.setLevel(logging.INFO) -class symbols(): +class symbols(object): def __init__(self, init=None): if init is None: init = {} self.symbols_id = {} self.symbols_mem = {} - for k, v in init.items(): - self[k] = v + for expr, value in init.items(): + self[expr] = value - def __contains__(self, a): - if not isinstance(a, m2_expr.ExprMem): - return self.symbols_id.__contains__(a) - if not self.symbols_mem.__contains__(a.arg): + def __contains__(self, expr): + if not isinstance(expr, m2_expr.ExprMem): + return self.symbols_id.__contains__(expr) + if not self.symbols_mem.__contains__(expr.arg): return False - return self.symbols_mem[a.arg][0].size == a.size - - def __getitem__(self, a): - if not isinstance(a, m2_expr.ExprMem): - return self.symbols_id.__getitem__(a) - if not a.arg in self.symbols_mem: - raise KeyError(a) - m = self.symbols_mem.__getitem__(a.arg) - if m[0].size != a.size: - raise KeyError(a) - return m[1] - - def __setitem__(self, a, v): - if not isinstance(a, m2_expr.ExprMem): - self.symbols_id.__setitem__(a, v) + return self.symbols_mem[expr.arg][0].size == expr.size + + def __getitem__(self, expr): + if not isinstance(expr, m2_expr.ExprMem): + return self.symbols_id.__getitem__(expr) + if not expr.arg in self.symbols_mem: + raise KeyError(expr) + mem, value = self.symbols_mem.__getitem__(expr.arg) + if mem.size != expr.size: + raise KeyError(expr) + return value + + def get(self, expr, default=None): + if not isinstance(expr, m2_expr.ExprMem): + return self.symbols_id.get(expr, default) + if not expr.arg in self.symbols_mem: + return default + mem, value = self.symbols_mem.__getitem__(expr.arg) + if mem.size != expr.size: + return default + return value + + def __setitem__(self, expr, value): + if not isinstance(expr, m2_expr.ExprMem): + self.symbols_id.__setitem__(expr, value) return - self.symbols_mem.__setitem__(a.arg, (a, v)) + assert expr.size == value.size + self.symbols_mem.__setitem__(expr.arg, (expr, value)) def __iter__(self): - for a in self.symbols_id: - yield a - for a in self.symbols_mem: - yield self.symbols_mem[a][0] - - def __delitem__(self, a): - if not isinstance(a, m2_expr.ExprMem): - self.symbols_id.__delitem__(a) + for expr in self.symbols_id: + yield expr + for expr in self.symbols_mem: + yield self.symbols_mem[expr][0] + + def __delitem__(self, expr): + if not isinstance(expr, m2_expr.ExprMem): + self.symbols_id.__delitem__(expr) else: - self.symbols_mem.__delitem__(a.arg) + self.symbols_mem.__delitem__(expr.arg) def items(self): - k = self.symbols_id.items() + [x for x in self.symbols_mem.values()] - return k + return self.symbols_id.items() + [x for x in self.symbols_mem.values()] def keys(self): - k = self.symbols_id.keys() + [x[0] for x in self.symbols_mem.values()] - return k + return (self.symbols_id.keys() + + [x[0] for x in self.symbols_mem.values()]) def copy(self): - p = symbols() - p.symbols_id = dict(self.symbols_id) - p.symbols_mem = dict(self.symbols_mem) - return p + new_symbols = symbols() + new_symbols.symbols_id = dict(self.symbols_id) + new_symbols.symbols_mem = dict(self.symbols_mem) + return new_symbols def inject_info(self, info): - s = symbols() - for k, v in self.items(): - k = expr_simp(k.replace_expr(info)) - v = expr_simp(v.replace_expr(info)) - s[k] = v - return s + new_symbols = symbols() + for expr, value in self.items(): + expr = expr_simp(expr.replace_expr(info)) + value = expr_simp(value.replace_expr(info)) + new_symbols[expr] = value + return new_symbols class symbexec(object): @@ -88,154 +102,154 @@ class symbexec(object): func_write=None, sb_expr_simp=expr_simp): self.symbols = symbols() - for k, v in known_symbols.items(): - self.symbols[k] = v + for expr, value in known_symbols.items(): + self.symbols[expr] = value self.func_read = func_read self.func_write = func_write self.ir_arch = ir_arch self.expr_simp = sb_expr_simp - def find_mem_by_addr(self, e): - if e in self.symbols.symbols_mem: - return self.symbols.symbols_mem[e][0] + def find_mem_by_addr(self, expr): + """ + Return memory keys with pointer equal to @expr + @expr: address of the searched memory variable + """ + if expr in self.symbols.symbols_mem: + return self.symbols.symbols_mem[expr][0] return None - def eval_ExprId(self, e, eval_cache=None): - if eval_cache is None: - eval_cache = {} - if isinstance(e.name, asmbloc.asm_label) and e.name.offset is not None: - return m2_expr.ExprInt_from(e, e.name.offset) - if not e in self.symbols: - # raise ValueError('unknown symbol %s'% e) - return e - return self.symbols[e] - - def eval_ExprInt(self, e, eval_cache=None): - return e - - def eval_ExprMem(self, e, eval_cache=None): - if eval_cache is None: - eval_cache = {} - a_val = self.expr_simp(self.eval_expr(e.arg, eval_cache)) - if a_val != e.arg: - a = self.expr_simp(m2_expr.ExprMem(a_val, size=e.size)) - else: - a = e - if a in self.symbols: - return self.symbols[a] - tmp = None - # test if mem lookup is known - if a_val in self.symbols.symbols_mem: - tmp = self.symbols.symbols_mem[a_val][0] - if tmp is None: - - v = self.find_mem_by_addr(a_val) - if not v: - out = [] - ov = self.get_mem_overlapping(a, eval_cache) - off_base = 0 - ov.sort() - # ov.reverse() - for off, x in ov: - # off_base = off * 8 - # x_size = self.symbols[x].size - if off >= 0: - m = min(a.size - off * 8, x.size) - ee = m2_expr.ExprSlice(self.symbols[x], 0, m) - ee = self.expr_simp(ee) - out.append((ee, off_base, off_base + m)) - off_base += m - else: - m = min(a.size - off * 8, x.size) - ee = m2_expr.ExprSlice(self.symbols[x], -off * 8, m) - ff = self.expr_simp(ee) - new_off_base = off_base + m + off * 8 - out.append((ff, off_base, new_off_base)) - off_base = new_off_base - if out: - missing_slice = self.rest_slice(out, 0, a.size) - for sa, sb in missing_slice: - ptr = self.expr_simp( - a_val + m2_expr.ExprInt_from(a_val, sa / 8) - ) - mm = m2_expr.ExprMem(ptr, size=sb - sa) - mm.is_term = True - mm.is_simp = True - out.append((mm, sa, sb)) - out.sort(key=lambda x: x[1]) - # for e, sa, sb in out: - # print str(e), sa, sb - ee = m2_expr.ExprSlice(m2_expr.ExprCompose(out), 0, a.size) - ee = self.expr_simp(ee) - return ee - if self.func_read and isinstance(a.arg, m2_expr.ExprInt): - return self.func_read(a) + def get_mem_state(self, expr): + """ + Evaluate the @expr memory in the current state using @cache + @expr: the memory key + """ + ptr, size = expr.arg, expr.size + ret = self.find_mem_by_addr(ptr) + if not ret: + out = [] + overlaps = self.get_mem_overlapping(expr) + off_base = 0 + for off, mem in overlaps: + if off >= 0: + new_size = min(size - off * 8, mem.size) + tmp = self.expr_simp(self.symbols[mem][0:new_size]) + out.append((tmp, off_base, off_base + new_size)) + off_base += new_size + else: + new_size = min(size - off * 8, mem.size) + tmp = self.expr_simp(self.symbols[mem][-off * 8:new_size]) + new_off_base = off_base + new_size + off * 8 + out.append((tmp, off_base, new_off_base)) + off_base = new_off_base + if out: + missing_slice = self.rest_slice(out, 0, size) + for slice_start, slice_stop in missing_slice: + ptr = self.expr_simp(ptr + m2_expr.ExprInt(slice_start / 8, ptr.size)) + mem = m2_expr.ExprMem(ptr, slice_stop - slice_start) + out.append((mem, slice_start, slice_stop)) + out.sort(key=lambda x: x[1]) + args = [expr for (expr, _, _) in out] + tmp = m2_expr.ExprSlice(m2_expr.ExprCompose(*args), 0, size) + tmp = self.expr_simp(tmp) + return tmp + + + if self.func_read and isinstance(ptr, m2_expr.ExprInt): + return self.func_read(expr) else: - # XXX hack test - a.is_term = True - return a + return expr # bigger lookup - if a.size > tmp.size: - rest = a.size - ptr = a_val + if size > ret.size: + rest = size + ptr = ptr out = [] ptr_index = 0 while rest: - v = self.find_mem_by_addr(ptr) - if v is None: - # raise ValueError("cannot find %s in mem"%str(ptr)) - val = m2_expr.ExprMem(ptr, 8) - v = val + mem = self.find_mem_by_addr(ptr) + if mem is None: + value = m2_expr.ExprMem(ptr, 8) + mem = value diff_size = 8 - elif rest >= v.size: - val = self.symbols[v] - diff_size = v.size + elif rest >= mem.size: + value = self.symbols[mem] + diff_size = mem.size else: diff_size = rest - val = self.symbols[v][0:diff_size] - val = (val, ptr_index, ptr_index + diff_size) - out.append(val) + value = self.symbols[mem][0:diff_size] + out.append((value, ptr_index, ptr_index + diff_size)) ptr_index += diff_size rest -= diff_size - ptr = self.expr_simp( - self.eval_expr( - m2_expr.ExprOp('+', ptr, - m2_expr.ExprInt_from(ptr, v.size / 8)), - eval_cache) - ) - e = self.expr_simp(m2_expr.ExprCompose(out)) - return e + ptr = self.expr_simp(ptr + m2_expr.ExprInt(mem.size / 8, ptr.size)) + out.sort(key=lambda x: x[1]) + args = [expr for (expr, _, _) in out] + ret = self.expr_simp(m2_expr.ExprCompose(*args)) + return ret # part lookup - tmp = self.expr_simp(m2_expr.ExprSlice(self.symbols[tmp], 0, a.size)) - return tmp - - def eval_expr_visit(self, e, eval_cache=None): - if eval_cache is None: - eval_cache = {} - # print 'visit', e, e.is_term - if e.is_term: - return e - if e in eval_cache: - return eval_cache[e] - c = e.__class__ - deal_class = {m2_expr.ExprId: self.eval_ExprId, - m2_expr.ExprInt: self.eval_ExprInt, - m2_expr.ExprMem: self.eval_ExprMem, - } - # print 'eval', e - if c in deal_class: - e = deal_class[c](e, eval_cache) - # print "ret", e - if not (isinstance(e, m2_expr.ExprId) or isinstance(e, - m2_expr.ExprInt)): - e.is_term = True - return e - - def eval_expr(self, e, eval_cache=None): - if eval_cache is None: - eval_cache = {} - r = e.visit(lambda x: self.eval_expr_visit(x, eval_cache)) - return r + ret = self.expr_simp(self.symbols[ret][:size]) + return ret + + + def apply_expr_on_state_visit_cache(self, expr, state, cache, level=0): + """ + Deep First evaluate nodes: + 1. evaluate node's sons + 2. simplify + """ + + #print '\t'*level, "Eval:", expr + if expr in cache: + ret = cache[expr] + #print "In cache!", ret + elif isinstance(expr, m2_expr.ExprInt): + return expr + elif isinstance(expr, m2_expr.ExprId): + if isinstance(expr.name, asmbloc.asm_label) and expr.name.offset is not None: + ret = m2_expr.ExprInt_from(expr, expr.name.offset) + else: + ret = state.get(expr, expr) + elif isinstance(expr, m2_expr.ExprMem): + ptr = self.apply_expr_on_state_visit_cache(expr.arg, state, cache, level+1) + ret = m2_expr.ExprMem(ptr, expr.size) + ret = self.get_mem_state(ret) + assert expr.size == ret.size + elif isinstance(expr, m2_expr.ExprCond): + cond = self.apply_expr_on_state_visit_cache(expr.cond, state, cache, level+1) + src1 = self.apply_expr_on_state_visit_cache(expr.src1, state, cache, level+1) + src2 = self.apply_expr_on_state_visit_cache(expr.src2, state, cache, level+1) + ret = m2_expr.ExprCond(cond, src1, src2) + elif isinstance(expr, m2_expr.ExprSlice): + arg = self.apply_expr_on_state_visit_cache(expr.arg, state, cache, level+1) + ret = m2_expr.ExprSlice(arg, expr.start, expr.stop) + elif isinstance(expr, m2_expr.ExprOp): + args = [] + for oarg in expr.args: + arg = self.apply_expr_on_state_visit_cache(oarg, state, cache, level+1) + assert oarg.size == arg.size + args.append(arg) + ret = m2_expr.ExprOp(expr.op, *args) + elif isinstance(expr, m2_expr.ExprCompose): + args = [] + for arg in expr.args: + args.append(self.apply_expr_on_state_visit_cache(arg, state, cache, level+1)) + ret = m2_expr.ExprCompose(*args) + else: + raise TypeError("Unknown expr type") + #print '\t'*level, "Result", ret + ret = self.expr_simp(ret) + #print '\t'*level, "Result simpl", ret + + assert expr.size == ret.size + cache[expr] = ret + return ret + + def apply_expr_on_state(self, expr, cache): + if cache is None: + cache = {} + ret = self.apply_expr_on_state_visit_cache(expr, self.symbols, cache) + return ret + + def eval_expr(self, expr, eval_cache=None): + return self.apply_expr_on_state(expr, eval_cache) def modified_regs(self, init_state=None): if init_state is None: @@ -250,121 +264,111 @@ class symbexec(object): yield i def modified_mems(self, init_state=None): + if init_state is None: + init_state = self.ir_arch.arch.regs.regs_init mems = self.symbols.symbols_mem.values() mems.sort() - for m, _ in mems: - yield m + for mem, _ in mems: + if mem in init_state and \ + mem in self.symbols.symbols_mem and \ + self.symbols.symbols_mem[mem] == init_state[mem]: + continue + yield mem def modified(self, init_state=None): - for r in self.modified_regs(init_state): - yield r - for m in self.modified_mems(init_state): - yield m + for reg in self.modified_regs(init_state): + yield reg + for mem in self.modified_mems(init_state): + yield mem def dump_id(self): + """ + Dump modififed registers symbols only + """ ids = self.symbols.symbols_id.keys() ids.sort() - for i in ids: - if i in self.ir_arch.arch.regs.regs_init and \ - i in self.symbols.symbols_id and \ - self.symbols.symbols_id[i] == self.ir_arch.arch.regs.regs_init[i]: + for expr in ids: + if (expr in self.ir_arch.arch.regs.regs_init and + expr in self.symbols.symbols_id and + self.symbols.symbols_id[expr] == self.ir_arch.arch.regs.regs_init[expr]): continue - print i, self.symbols.symbols_id[i] + print expr, "=", self.symbols.symbols_id[expr] def dump_mem(self): + """ + Dump modififed memory symbols + """ mems = self.symbols.symbols_mem.values() mems.sort() - for m, v in mems: - print m, v + for mem, value in mems: + print mem, value def rest_slice(self, slices, start, stop): - o = [] + """ + Return the complementary slices of @slices in the range @start, @stop + @slices: base slices + @start, @stop: interval range + """ + out = [] last = start - for _, a, b in slices: - if a == last: - last = b + for _, slice_start, slice_stop in slices: + if slice_start == last: + last = slice_stop continue - o.append((last, a)) - last = b + out.append((last, slice_start)) + last = slice_stop if last != stop: - o.append((b, stop)) - return o - - def substract_mems(self, a, b): - ex = b.arg - a.arg - ex = self.expr_simp(self.eval_expr(ex, {})) - if not isinstance(ex, m2_expr.ExprInt): - return None - ptr_diff = int(int32(ex.arg)) - out = [] - if ptr_diff < 0: - # [a ] - #[b ]XXX - sub_size = b.size + ptr_diff * 8 - if sub_size >= a.size: - pass - else: - ex = m2_expr.ExprOp('+', a.arg, - m2_expr.ExprInt_from(a.arg, sub_size / 8)) - ex = self.expr_simp(self.eval_expr(ex, {})) + out.append((slice_stop, stop)) + return out - rest_ptr = ex - rest_size = a.size - sub_size + def substract_mems(self, arg1, arg2): + """ + Return the remaining memory areas of @arg1 - @arg2 + @arg1, @arg2: ExprMem + """ - val = self.symbols[a][sub_size:a.size] - out = [(m2_expr.ExprMem(rest_ptr, rest_size), val)] - else: - #[a ] - # XXXX[b ]YY + ptr_diff = self.expr_simp(arg2.arg - arg1.arg) + ptr_diff = int(int32(ptr_diff.arg)) - #[a ] - # XXXX[b ] + zone1 = interval([(0, arg1.size/8-1)]) + zone2 = interval([(ptr_diff, ptr_diff + arg2.size/8-1)]) + zones = zone1 - zone2 + + out = [] + for start, stop in zones: + ptr = arg1.arg + m2_expr.ExprInt(start, arg1.arg.size) + ptr = self.expr_simp(ptr) + value = self.expr_simp(self.symbols[arg1][start*8:(stop+1)*8]) + mem = m2_expr.ExprMem(ptr, (stop - start + 1)*8) + assert mem.size == value.size + out.append((mem, value)) - out = [] - # part X - if ptr_diff > 0: - val = self.symbols[a][0:ptr_diff * 8] - out.append((m2_expr.ExprMem(a.arg, ptr_diff * 8), val)) - # part Y - if ptr_diff * 8 + b.size < a.size: - - ex = m2_expr.ExprOp('+', b.arg, - m2_expr.ExprInt_from(b.arg, b.size / 8)) - ex = self.expr_simp(self.eval_expr(ex, {})) - - rest_ptr = ex - rest_size = a.size - (ptr_diff * 8 + b.size) - val = self.symbols[a][ptr_diff * 8 + b.size:a.size] - out.append((m2_expr.ExprMem(ex, val.size), val)) return out - # give mem stored overlapping requested mem ptr - def get_mem_overlapping(self, e, eval_cache=None): - if eval_cache is None: - eval_cache = {} - if not isinstance(e, m2_expr.ExprMem): - raise ValueError('mem overlap bad arg') - ov = [] - # suppose max mem size is 64 bytes, compute all reachable addresses - to_test = [] - base_ptr = self.expr_simp(e.arg) - for i in xrange(-7, e.size / 8): - ex = self.expr_simp( - self.eval_expr(base_ptr + m2_expr.ExprInt_from(e.arg, i), - eval_cache)) - to_test.append((i, ex)) - - for i, x in to_test: - if not x in self.symbols.symbols_mem: + + def get_mem_overlapping(self, expr): + """ + Gives mem stored overlapping memory in @expr + Hypothesis: Max mem size is 64 bytes, compute all reachable addresses + @expr: target memory + """ + + overlaps = [] + base_ptr = self.expr_simp(expr.arg) + for i in xrange(-7, expr.size / 8): + new_ptr = base_ptr + m2_expr.ExprInt(i, expr.arg.size) + new_ptr = self.expr_simp(new_ptr) + + mem, origin = self.symbols.symbols_mem.get(new_ptr, (None, None)) + if mem is None: continue - ex = self.expr_simp(self.eval_expr(e.arg - x, eval_cache)) - if not isinstance(ex, m2_expr.ExprInt): - raise ValueError('ex is not ExprInt') - ptr_diff = int32(ex.arg) - if ptr_diff >= self.symbols.symbols_mem[x][1].size / 8: - # print "too long!" + + ptr_diff = -i + if ptr_diff >= origin.size / 8: + # access is too small to overlap the memory target continue - ov.append((i, self.symbols.symbols_mem[x][0])) - return ov + overlaps.append((i, mem)) + + return overlaps def eval_ir_expr(self, assignblk): """ @@ -372,16 +376,13 @@ class symbexec(object): @assignblk: AssignBlock instance """ pool_out = {} - - eval_cache = dict(self.symbols.items()) - + eval_cache = {} for dst, src in assignblk.iteritems(): src = self.eval_expr(src, eval_cache) if isinstance(dst, m2_expr.ExprMem): - a = self.eval_expr(dst.arg, eval_cache) - a = self.expr_simp(a) + ptr = self.eval_expr(dst.arg, eval_cache) # test if mem lookup is known - tmp = m2_expr.ExprMem(a, dst.size) + tmp = m2_expr.ExprMem(ptr, dst.size) pool_out[tmp] = src elif isinstance(dst, m2_expr.ExprId): @@ -398,18 +399,18 @@ class symbexec(object): """ mem_dst = [] src_dst = self.eval_ir_expr(assignblk) - eval_cache = dict(self.symbols.items()) for dst, src in src_dst: if isinstance(dst, m2_expr.ExprMem): - mem_overlap = self.get_mem_overlapping(dst, eval_cache) + mem_overlap = self.get_mem_overlapping(dst) for _, base in mem_overlap: diff_mem = self.substract_mems(base, dst) del self.symbols[base] for new_mem, new_val in diff_mem: - new_val.is_term = True self.symbols[new_mem] = new_val src_o = self.expr_simp(src) self.symbols[dst] = src_o + if dst == src_o: + del self.symbols[dst] if isinstance(dst, m2_expr.ExprMem): if self.func_write and isinstance(dst.arg, m2_expr.ExprInt): self.func_write(self, dst, src_o) @@ -424,51 +425,60 @@ class symbexec(object): @step: display intermediate steps """ for assignblk in irb.irs: - self.eval_ir(assignblk) if step: + print 'Assignblk:' + print assignblk print '_' * 80 + self.eval_ir(assignblk) + if step: self.dump_id() - eval_cache = dict(self.symbols.items()) - return self.eval_expr(self.ir_arch.IRDst, eval_cache) + self.dump_mem() + print '_' * 80 + return self.eval_expr(self.ir_arch.IRDst) + + def emul_ir_bloc(self, myir, addr, step=False): + warnings.warn('DEPRECATION WARNING: use "emul_ir_block(self, addr, step=False)" instead of emul_ir_bloc') + return self.emul_ir_block(addr, step) + + def emul_ir_block(self, addr, step=False): + irblock = self.ir_arch.get_bloc(addr) + if irblock is not None: + addr = self.emulbloc(irblock, step=step) + return addr - def emul_ir_bloc(self, myir, ad, step=False): - b = myir.get_bloc(ad) - if b is not None: - ad = self.emulbloc(b, step=step) - return ad + def emul_ir_blocs(self, myir, addr, lbl_stop=None, step=False): + warnings.warn('DEPRECATION WARNING: use "emul_ir_blocks(self, addr, lbl_stop=None, step=False):" instead of emul_ir_blocs') + return self.emul_ir_blocks(addr, lbl_stop, step) - def emul_ir_blocs(self, myir, ad, lbl_stop=None, step=False): + def emul_ir_blocks(self, addr, lbl_stop=None, step=False): while True: - b = myir.get_bloc(ad) - if b is None: + irblock = self.ir_arch.get_bloc(addr) + if irblock is None: break - if b.label == lbl_stop: + if irblock.label == lbl_stop: break - ad = self.emulbloc(b, step=step) - return ad - - def del_mem_above_stack(self, sp): - sp_val = self.symbols[sp] - for mem_ad, (mem, _) in self.symbols.symbols_mem.items(): - # print mem_ad, sp_val - diff = self.eval_expr(mem_ad - sp_val, {}) - diff = expr_simp(diff) + addr = self.emulbloc(irblock, step=step) + return addr + + def del_mem_above_stack(self, stack_ptr): + stack_ptr = self.eval_expr(stack_ptr) + for mem_addr, (mem, _) in self.symbols.symbols_mem.items(): + diff = self.expr_simp(mem_addr - stack_ptr) if not isinstance(diff, m2_expr.ExprInt): continue - m = expr_simp(diff.msb()) - if m.arg == 1: + sign_bit = self.expr_simp(diff.msb()) + if sign_bit.arg == 1: del self.symbols[mem] def apply_expr(self, expr): """Evaluate @expr and apply side effect if needed (ie. if expr is an assignment). Return the evaluated value""" - # Eval expression - to_eval = expr.src if isinstance(expr, m2_expr.ExprAff) else expr - ret = self.expr_simp(self.eval_expr(to_eval)) - # Update value if needed if isinstance(expr, m2_expr.ExprAff): - self.eval_ir(AssignBlock([m2_expr.ExprAff(expr.dst, ret)])) + ret = self.eval_expr(expr.src) + self.eval_ir(AssignBlock([expr])) + else: + ret = self.eval_expr(expr) return ret diff --git a/miasm2/ir/translators/C.py b/miasm2/ir/translators/C.py index 340fbfec..57859f9c 100644 --- a/miasm2/ir/translators/C.py +++ b/miasm2/ir/translators/C.py @@ -145,11 +145,11 @@ class TranslatorC(Translator): out = [] # XXX check mask for 64 bit & 32 bit compat dst_cast = "uint%d_t" % expr.size - for x in expr.args: + for index, arg in expr.iter_args(): out.append("(((%s)(%s & 0x%X)) << %d)" % (dst_cast, - self.from_expr(x[0]), - (1 << (x[2] - x[1])) - 1, - x[1])) + self.from_expr(arg), + (1 << arg.size) - 1, + index)) out = ' | '.join(out) return '(' + out + ')' diff --git a/miasm2/ir/translators/miasm.py b/miasm2/ir/translators/miasm.py index ef91cfb1..b390eb51 100644 --- a/miasm2/ir/translators/miasm.py +++ b/miasm2/ir/translators/miasm.py @@ -27,9 +27,8 @@ class TranslatorMiasm(Translator): ", ".join(map(self.from_expr, expr.args))) def from_ExprCompose(self, expr): - args = ["(%s, %d, %d)" % (self.from_expr(arg), start, stop) - for arg, start, stop in expr.args] - return "ExprCompose([%s])" % ", ".join(args) + args = ["%s" % self.from_expr(arg) for arg in expr.args] + return "ExprCompose(%s)" % ", ".join(args) def from_ExprAff(self, expr): return "ExprAff(%s, %s)" % (self.from_expr(expr.dst), diff --git a/miasm2/ir/translators/python.py b/miasm2/ir/translators/python.py index f745d2df..c06d865c 100644 --- a/miasm2/ir/translators/python.py +++ b/miasm2/ir/translators/python.py @@ -31,10 +31,10 @@ class TranslatorPython(Translator): def from_ExprCompose(self, expr): out = [] - for subexpr, start, stop in expr.args: - out.append("((%s & 0x%x) << %d)" % (self.from_expr(subexpr), - (1 << (stop - start)) - 1, - start)) + for index, arg in expr.iter_args(): + out.append("((%s & 0x%x) << %d)" % (self.from_expr(arg), + (1 << arg.size) - 1, + index)) return "(%s)" % ' | '.join(out) def from_ExprCond(self, expr): diff --git a/miasm2/ir/translators/smt2.py b/miasm2/ir/translators/smt2.py index 5bffd7f2..5d5fb26b 100644 --- a/miasm2/ir/translators/smt2.py +++ b/miasm2/ir/translators/smt2.py @@ -163,10 +163,8 @@ class TranslatorSMT2(Translator): def from_ExprCompose(self, expr): res = None - args = sorted(expr.args, key=operator.itemgetter(2)) # sort by start off - for subexpr, start, stop in args: - sube = self.from_expr(subexpr) - e = bv_extract(stop-start-1, 0, sube) + for arg in expr.args: + e = bv_extract(arg.size-1, 0, self.from_expr(arg)) if res: res = bv_concat(e, res) else: diff --git a/miasm2/ir/translators/z3_ir.py b/miasm2/ir/translators/z3_ir.py index e0460cc4..ccb14b4f 100644 --- a/miasm2/ir/translators/z3_ir.py +++ b/miasm2/ir/translators/z3_ir.py @@ -137,10 +137,8 @@ class TranslatorZ3(Translator): def from_ExprCompose(self, expr): res = None - args = sorted(expr.args, key=operator.itemgetter(2)) # sort by start off - for subexpr, start, stop in args: - sube = self.from_expr(subexpr) - e = z3.Extract(stop-start-1, 0, sube) + for arg in expr.args: + e = z3.Extract(arg.size-1, 0, self.from_expr(arg)) if res != None: res = z3.Concat(e, res) else: diff --git a/test/arch/arm/sem.py b/test/arch/arm/sem.py index feef7372..8fc609fb 100644 --- a/test/arch/arm/sem.py +++ b/test/arch/arm/sem.py @@ -29,7 +29,7 @@ def compute(asm, inputstate={}, debug=False): instr = mn.dis(code, "l") instr.offset = inputstate.get(PC, 0) interm.add_instr(instr) - symexec.emul_ir_blocs(interm, instr.offset) + symexec.emul_ir_blocks(instr.offset) if debug: for k, v in symexec.symbols.items(): if regs_init.get(k, None) != v: @@ -285,7 +285,7 @@ class TestARMSemantic(unittest.TestCase): self.assertEqual(compute('AND R4, R4, R5 LSR 2 ', {R4: 0xFFFFFFFF, R5: 0x80000041, }), {R4: 0x20000010, R5: 0x80000041, }) self.assertEqual(compute('AND R4, R4, R5 ASR 3 ', {R4: 0xF00000FF, R5: 0x80000081, }), {R4: 0xF0000010, R5: 0x80000081, }) self.assertEqual(compute('AND R4, R4, R5 ROR 4 ', {R4: 0xFFFFFFFF, R5: 0x000000FF, }), {R4: 0xF000000F, R5: 0x000000FF, }) - self.assertEqual(compute('AND R4, R4, R5 RRX ', {R4: 0xFFFFFFFF, R5: 0x00000101, }), {R4: ExprCompose([(ExprInt(0x80, 31),0,31), (cf_init,31,32)]), R5: 0x00000101, }) + self.assertEqual(compute('AND R4, R4, R5 RRX ', {R4: 0xFFFFFFFF, R5: 0x00000101, }), {R4: ExprCompose(ExprInt(0x80, 31), cf_init), R5: 0x00000101, }) # §A8.8.15: AND{S}{<c>}{<q>} {<Rd>,} <Rn>, <Rm>, <type> <Rs> self.assertEqual(compute('AND R4, R6, R4 LSL R5', {R4: 0x00000001, R5: 0x00000004, R6: -1, }), {R4: 0x00000010, R5: 0x00000004, R6: 0xFFFFFFFF, }) diff --git a/test/arch/msp430/sem.py b/test/arch/msp430/sem.py index 2488d633..515b4c53 100644 --- a/test/arch/msp430/sem.py +++ b/test/arch/msp430/sem.py @@ -27,7 +27,7 @@ def compute(asm, inputstate={}, debug=False): instr = mn.dis(code, mode) instr.offset = inputstate.get(PC, 0) interm.add_instr(instr) - symexec.emul_ir_blocs(interm, instr.offset) + symexec.emul_ir_blocks(instr.offset) if debug: for k, v in symexec.symbols.items(): if regs_init.get(k, None) != v: diff --git a/test/arch/x86/sem.py b/test/arch/x86/sem.py index 617b929b..7cf81828 100644 --- a/test/arch/x86/sem.py +++ b/test/arch/x86/sem.py @@ -26,7 +26,7 @@ def symb_exec(interm, inputstate, debug): sympool = dict(regs_init) sympool.update(inputstate) symexec = symbexec(interm, sympool) - symexec.emul_ir_blocs(interm, 0) + symexec.emul_ir_blocks(0) if debug: for k, v in symexec.symbols.items(): if regs_init.get(k, None) != v: diff --git a/test/expression/expression.py b/test/expression/expression.py index 90236744..847ba7eb 100644 --- a/test/expression/expression.py +++ b/test/expression/expression.py @@ -30,10 +30,10 @@ for expr in [ A + cst1, A + ExprCond(cond1, cst1, cst2), ExprCond(cond1, cst1, cst2) + ExprCond(cond2, cst3, cst4), - ExprCompose([(A, 0, 32), (cst1, 32, 64)]), - ExprCompose([(ExprCond(cond1, cst1, cst2), 0, 32), (A, 32, 64)]), - ExprCompose([(ExprCond(cond1, cst1, cst2), 0, 32), - (ExprCond(cond2, cst3, cst4), 32, 64)]), + ExprCompose(A, cst1), + ExprCompose(ExprCond(cond1, cst1, cst2), A), + ExprCompose(ExprCond(cond1, cst1, cst2), + ExprCond(cond2, cst3, cst4)), ExprCond(ExprCond(cond1, cst1, cst2), cst3, cst4), ]: print "*" * 80 diff --git a/test/expression/expression_helper.py b/test/expression/expression_helper.py index 514a9a51..a3a8fba4 100644 --- a/test/expression/expression_helper.py +++ b/test/expression/expression_helper.py @@ -16,11 +16,10 @@ class TestExpressionExpressionHelper(unittest.TestCase): ebx = m2_expr.ExprId("EBX") ax = eax[0:16] expr = eax + ebx - expr = m2_expr.ExprCompose([(ax, 0, 16), (expr[16:32], 16, 32)]) + expr = m2_expr.ExprCompose(ax, expr[16:32]) expr2 = m2_expr.ExprMem((eax + ebx) ^ (eax), size=16) expr2 = expr2 | ax | expr2 | cst - exprf = expr - expr + m2_expr.ExprCompose([(expr2, 0, 16), - (cst, 16, 32)]) + exprf = expr - expr + m2_expr.ExprCompose(expr2, cst) # Identify variables vi = Variables_Identifier(exprf) diff --git a/test/expression/simplifications.py b/test/expression/simplifications.py index 99cc7c35..bf658a30 100644 --- a/test/expression/simplifications.py +++ b/test/expression/simplifications.py @@ -24,11 +24,9 @@ i2 = ExprInt(uint32(0x2)) icustom = ExprInt(uint32(0x12345678)) cc = ExprCond(a, b, c) -o = ExprCompose([(a[:8], 8, 16), - (a[8:16], 0, 8)]) +o = ExprCompose(a[8:16], a[:8]) -o2 = ExprCompose([(a[8:16], 0, 8), - (a[:8], 8, 16)]) +o2 = ExprCompose(a[8:16], a[:8]) l = [a[:8], b[:8], c[:8], m[:8], s, i1[:8], i2[:8], o[:8]] l2 = l[::-1] @@ -93,11 +91,11 @@ to_test = [(ExprInt32(1) - ExprInt32(1), ExprInt32(0)), (a[8:16][:8], a[8:16]), (a[8:32][:8], a[8:16]), (a[:16][8:16], a[8:16]), - (ExprCompose([(a, 0, 32)]), a), - (ExprCompose([(a[:16], 0, 16)]), a[:16]), - (ExprCompose([(a[:16], 0, 16), (a[:16], 16, 32)]), - ExprCompose([(a[:16], 0, 16), (a[:16], 16, 32)]),), - (ExprCompose([(a[:16], 0, 16), (a[16:32], 16, 32)]), a), + (ExprCompose(a), a), + (ExprCompose(a[:16]), a[:16]), + (ExprCompose(a[:16], a[:16]), + ExprCompose(a[:16], a[:16]),), + (ExprCompose(a[:16], a[16:32]), a), (ExprMem(a)[:32], ExprMem(a)), (ExprMem(a)[:16], ExprMem(a, size=16)), @@ -106,14 +104,12 @@ to_test = [(ExprInt32(1) - ExprInt32(1), ExprInt32(0)), (ExprCond(ExprInt32(0), b, a), a), (ExprInt32(0x80000000)[31:32], ExprInt1(1)), - (ExprCompose([ - (ExprInt16(0x1337)[ - :8], 0, 8), (ExprInt16(0x1337)[8:16], 8, 16)]), + (ExprCompose(ExprInt16(0x1337)[:8], ExprInt16(0x1337)[8:16]), ExprInt16(0x1337)), - (ExprCompose([(ExprInt32(0x1337beef)[8:16], 8, 16), - (ExprInt32(0x1337beef)[:8], 0, 8), - (ExprInt32(0x1337beef)[16:32], 16, 32)]), + (ExprCompose(ExprInt32(0x1337beef)[:8], + ExprInt32(0x1337beef)[8:16], + ExprInt32(0x1337beef)[16:32]), ExprInt32(0x1337BEEF)), (ExprCond(a, ExprCond(a, @@ -122,9 +118,9 @@ to_test = [(ExprInt32(1) - ExprInt32(1), ExprInt32(0)), d), ExprCond(a, b, d)), ((a & b & ExprInt32(0x12))[31:32], ExprInt1(0)), - (ExprCompose([ - (ExprCond(a, ExprInt16(0x10), ExprInt16(0x20)), 0, 16), - (ExprInt16(0x1337), 16, 32)]), + (ExprCompose( + ExprCond(a, ExprInt16(0x10), ExprInt16(0x20)), + ExprInt16(0x1337)), ExprCond(a, ExprInt32(0x13370010), ExprInt32(0x13370020))), (ExprCond(ExprCond(a, ExprInt1(0), ExprInt1(1)), b, c), ExprCond(a, c, b)), @@ -167,103 +163,99 @@ to_test = [(ExprInt32(1) - ExprInt32(1), ExprInt32(0)), (ExprOp('-', ExprInt8(1), ExprInt8(0)), ExprInt8(1)), - (ExprCompose([(a, 0, 32), (ExprInt32(0), 32, 64)]) << ExprInt64(0x20), - ExprCompose([(ExprInt32(0), 0, 32), (a, 32, 64)])), - (ExprCompose([(a, 0, 32), (ExprInt32(0), 32, 64)]) << ExprInt64(0x10), - ExprCompose([(ExprInt16(0), 0, 16), (a, 16, 48), (ExprInt16(0), 48, 64)])), - (ExprCompose([(a, 0, 32), (ExprInt32(0), 32, 64)]) << ExprInt64(0x30), - ExprCompose([(ExprInt(0, 48), 0, 48), (a[:0x10], 48, 64)])), - (ExprCompose([(a, 0, 32), (ExprInt32(0), 32, 64)]) << ExprInt64(0x11), - ExprCompose([(ExprInt(0, 0x11), 0, 0x11), (a, 0x11, 0x31), (ExprInt(0, 0xF), 0x31, 0x40)])), - (ExprCompose([(a, 0, 32), (ExprInt32(0), 32, 64)]) << ExprInt64(0x40), + (ExprCompose(a, ExprInt32(0)) << ExprInt64(0x20), + ExprCompose(ExprInt32(0), a)), + (ExprCompose(a, ExprInt32(0)) << ExprInt64(0x10), + ExprCompose(ExprInt16(0), a, ExprInt16(0))), + (ExprCompose(a, ExprInt32(0)) << ExprInt64(0x30), + ExprCompose(ExprInt(0, 48), a[:0x10])), + (ExprCompose(a, ExprInt32(0)) << ExprInt64(0x11), + ExprCompose(ExprInt(0, 0x11), a, ExprInt(0, 0xF))), + (ExprCompose(a, ExprInt32(0)) << ExprInt64(0x40), ExprInt64(0)), - (ExprCompose([(a, 0, 32), (ExprInt32(0), 32, 64)]) << ExprInt64(0x50), + (ExprCompose(a, ExprInt32(0)) << ExprInt64(0x50), ExprInt64(0)), - (ExprCompose([(ExprInt32(0), 0, 32), (a, 32, 64)]) >> ExprInt64(0x20), - ExprCompose([(a, 0, 32), (ExprInt32(0), 32, 64)])), - (ExprCompose([(ExprInt32(0), 0, 32), (a, 32, 64)]) >> ExprInt64(0x10), - ExprCompose([(ExprInt16(0), 0, 16), (a, 16, 48), (ExprInt16(0), 48, 64)])), - (ExprCompose([(ExprInt32(0), 0, 32), (a, 32, 64)]) >> ExprInt64(0x30), - ExprCompose([(a[0x10:], 0, 16), (ExprInt(0, 48), 16, 64)])), - (ExprCompose([(ExprInt32(0), 0, 32), (a, 32, 64)]) >> ExprInt64(0x11), - ExprCompose([(ExprInt(0, 0xf), 0, 0xf), (a, 0xf, 0x2f), (ExprInt(0, 0x11), 0x2f, 0x40)])), - (ExprCompose([(ExprInt32(0), 0, 32), (a, 32, 64)]) >> ExprInt64(0x40), + (ExprCompose(ExprInt32(0), a) >> ExprInt64(0x20), + ExprCompose(a, ExprInt32(0))), + (ExprCompose(ExprInt32(0), a) >> ExprInt64(0x10), + ExprCompose(ExprInt16(0), a, ExprInt16(0))), + (ExprCompose(ExprInt32(0), a) >> ExprInt64(0x30), + ExprCompose(a[0x10:], ExprInt(0, 48))), + (ExprCompose(ExprInt32(0), a) >> ExprInt64(0x11), + ExprCompose(ExprInt(0, 0xf), a, ExprInt(0, 0x11))), + (ExprCompose(ExprInt32(0), a) >> ExprInt64(0x40), ExprInt64(0)), - (ExprCompose([(ExprInt32(0), 0, 32), (a, 32, 64)]) >> ExprInt64(0x50), + (ExprCompose(ExprInt32(0), a) >> ExprInt64(0x50), ExprInt64(0)), - (ExprCompose([(a, 0, 32), (b, 32, 64)]) << ExprInt64(0x20), - ExprCompose([(ExprInt32(0), 0, 32), (a, 32, 64)])), - (ExprCompose([(a, 0, 32), (b, 32, 64)]) << ExprInt64(0x10), - ExprCompose([(ExprInt16(0), 0, 16), (a, 16, 48), (b[:16], 48, 64)])), + (ExprCompose(a, b) << ExprInt64(0x20), + ExprCompose(ExprInt32(0), a)), + (ExprCompose(a, b) << ExprInt64(0x10), + ExprCompose(ExprInt16(0), a, b[:16])), - (ExprCompose([(a, 0, 32), (b, 32, 64)]) | ExprCompose([(c, 0, 32), (d, 32, 64)]), - ExprCompose([(a|c, 0, 32), (b|d, 32, 64)])), - (ExprCompose([(a, 0, 32), (ExprInt32(0), 32, 64)]) | ExprCompose([(ExprInt32(0), 0, 32), (d, 32, 64)]), - ExprCompose([(a, 0, 32), (d, 32, 64)])), - (ExprCompose([(f[:32], 0, 32), (ExprInt32(0), 32, 64)]) | ExprCompose([(ExprInt32(0), 0, 32), (f[32:], 32, 64)]), + (ExprCompose(a, b) | ExprCompose(c, d), + ExprCompose(a|c, b|d)), + (ExprCompose(a, ExprInt32(0)) | ExprCompose(ExprInt32(0), d), + ExprCompose(a, d)), + (ExprCompose(f[:32], ExprInt32(0)) | ExprCompose(ExprInt32(0), f[32:]), f), - ((ExprCompose([(a, 0, 32), (ExprInt32(0), 32, 64)]) * ExprInt64(0x123))[32:64], - (ExprCompose([(a, 0, 32), (ExprInt32(0), 32, 64)]) * ExprInt64(0x123))[32:64]), + ((ExprCompose(a, ExprInt32(0)) * ExprInt64(0x123))[32:64], + (ExprCompose(a, ExprInt32(0)) * ExprInt64(0x123))[32:64]), (ExprInt32(0x12), ExprInt32(0x12L)), - (ExprCompose(((a, 0, 32), (b, 32, 64), (c, 64, 96)))[:16], + (ExprCompose(a, b, c)[:16], a[:16]), - (ExprCompose(((a, 0, 32), (b, 32, 64), (c, 64, 96)))[16:32], + (ExprCompose(a, b, c)[16:32], a[16:]), - (ExprCompose(((a, 0, 32), (b, 32, 64), (c, 64, 96)))[32:48], + (ExprCompose(a, b, c)[32:48], b[:16]), - (ExprCompose(((a, 0, 32), (b, 32, 64), (c, 64, 96)))[48:64], + (ExprCompose(a, b, c)[48:64], b[16:]), - (ExprCompose(((a, 0, 32), (b, 32, 64), (c, 64, 96)))[64:80], + (ExprCompose(a, b, c)[64:80], c[:16]), - (ExprCompose(((a, 0, 32), (b, 32, 64), (c, 64, 96)))[80:], + (ExprCompose(a, b, c)[80:], c[16:]), - (ExprCompose(((a, 0, 32), (b, 32, 64), (c, 64, 96)))[80:82], + (ExprCompose(a, b, c)[80:82], c[16:18]), - (ExprCompose(((a, 0, 32), (b, 32, 64), (c, 64, 96)))[16:48], - ExprCompose(((a[16:], 0, 16), (b[:16], 16, 32)))), - (ExprCompose(((a, 0, 32), (b, 32, 64), (c, 64, 96)))[48:80], - ExprCompose(((b[16:], 0, 16), (c[:16], 16, 32)))), - - (ExprCompose(((a[0:8], 0, 8), - (b[8:16], 8, 16), - (ExprInt(uint48(0x0L)), 16, 64)))[12:32], - ExprCompose(((b[12:16], 0, 4), (ExprInt(uint16(0)), 4, 20))) + (ExprCompose(a, b, c)[16:48], + ExprCompose(a[16:], b[:16])), + (ExprCompose(a, b, c)[48:80], + ExprCompose(b[16:], c[:16])), + + (ExprCompose(a[0:8], b[8:16], ExprInt(uint48(0x0L)))[12:32], + ExprCompose(b[12:16], ExprInt(uint16(0))) ), - (ExprCompose(((ExprCompose(((a[:8], 0, 8), - (ExprInt(uint56(0x0L)), 8, 64)))[8:32] - & - ExprInt(uint24(0x1L)), 0, 24), - (ExprInt(uint40(0x0L)), 24, 64))), + (ExprCompose(ExprCompose(a[:8], ExprInt(uint56(0x0L)))[8:32] + & + ExprInt(uint24(0x1L)), + ExprInt(uint40(0x0L))), ExprInt64(0)), - (ExprCompose(((ExprCompose(((a[:8], 0, 8), - (ExprInt(uint56(0x0L)), 8, 64)))[:8] - & - ExprInt(uint8(0x1L)), 0, 8), - (ExprInt(uint56(0x0L)), 8, 64))), - ExprCompose(((a[:8]&ExprInt8(1), 0, 8), (ExprInt(uint56(0)), 8, 64)))), - - (ExprCompose(((ExprCompose(((a[:8], 0, 8), - (ExprInt(uint56(0x0L)), 8, 64)))[:32] - & - ExprInt(uint32(0x1L)), 0, 32), - (ExprInt(uint32(0x0L)), 32, 64))), - ExprCompose(((ExprCompose(((ExprSlice(a, 0, 8), 0, 8), - (ExprInt(uint24(0x0L)), 8, 32))) - & - ExprInt(uint32(0x1L)), 0, 32), - (ExprInt(uint32(0x0L)), 32, 64))) + (ExprCompose(ExprCompose(a[:8], ExprInt(uint56(0x0L)))[:8] + & + ExprInt(uint8(0x1L)), + (ExprInt(uint56(0x0L)))), + ExprCompose(a[:8]&ExprInt8(1), ExprInt(uint56(0)))), + + (ExprCompose(ExprCompose(a[:8], + ExprInt(uint56(0x0L)))[:32] + & + ExprInt(uint32(0x1L)), + ExprInt(uint32(0x0L))), + ExprCompose(ExprCompose(ExprSlice(a, 0, 8), + ExprInt(uint24(0x0L))) + & + ExprInt(uint32(0x1L)), + ExprInt(uint32(0x0L))) ), - (ExprCompose([(a[:16], 0, 16), (b[:16], 16, 32)])[8:32], - ExprCompose([(a[8:16], 0, 8), (b[:16], 8, 24)])), + (ExprCompose(a[:16], b[:16])[8:32], + ExprCompose(a[8:16], b[:16])), ((a >> ExprInt32(16))[:16], a[16:32]), ((a >> ExprInt32(16))[8:16], @@ -410,10 +402,10 @@ match_tests = [ (MatchExpr(ExprCond(x, y, z), ExprCond(a, b, c), [a, b, c]), {a: x, b: y, c: z}), - (MatchExpr(ExprCompose([(x[:8], 0, 8), (y[:8], 8, 16)]), a, [a]), - {a: ExprCompose([(x[:8], 0, 8), (y[:8], 8, 16)])}), - (MatchExpr(ExprCompose([(x[:8], 0, 8), (y[:8], 8, 16)]), - ExprCompose([(a[:8], 0, 8), (b[:8], 8, 16)]), [a, b]), + (MatchExpr(ExprCompose(x[:8], y[:8]), a, [a]), + {a: ExprCompose(x[:8], y[:8])}), + (MatchExpr(ExprCompose(x[:8], y[:8]), + ExprCompose(a[:8], b[:8]), [a, b]), {a: x, b: y}), (MatchExpr(e1, e2, [b]), {b: ExprInt32(0x10)}), (MatchExpr(e3, diff --git a/test/ir/symbexec.py b/test/ir/symbexec.py index 9165fccb..6df0bbc3 100644 --- a/test/ir/symbexec.py +++ b/test/ir/symbexec.py @@ -21,7 +21,7 @@ class TestSymbExec(unittest.TestCase): addr40 = ExprInt32(40) addr50 = ExprInt32(50) mem0 = ExprMem(addr0) - mem1 = ExprMem(addr1) + mem1 = ExprMem(addr1, 8) mem8 = ExprMem(addr8) mem9 = ExprMem(addr9) mem20 = ExprMem(addr20) @@ -34,22 +34,24 @@ class TestSymbExec(unittest.TestCase): id_a = ExprId('a') id_eax = ExprId('eax_init') - e = symbexec( - ir_x86_32(), {mem0: id_x, mem1: id_y, mem9: id_x, mem40w: id_x, mem50v: id_y, id_a: addr0, id_eax: addr0}) + e = symbexec(ir_x86_32(), + {mem0: id_x, mem1: id_y, mem9: id_x, + mem40w: id_x[:16], mem50v: id_y, + id_a: addr0, id_eax: addr0}) self.assertEqual(e.find_mem_by_addr(addr0), mem0) self.assertEqual(e.find_mem_by_addr(addrX), None) - self.assertEqual(e.eval_ExprMem(ExprMem(addr1 - addr1)), id_x) - self.assertEqual(e.eval_ExprMem(ExprMem(addr1, 8)), id_y) - self.assertEqual(e.eval_ExprMem(ExprMem(addr1 + addr1)), ExprCompose( - [(id_x[16:32], 0, 16), (ExprMem(ExprInt32(4), 16), 16, 32)])) - self.assertEqual(e.eval_ExprMem(mem8), ExprCompose( - [(id_x[0:24], 0, 24), (ExprMem(ExprInt32(11), 8), 24, 32)])) - self.assertEqual(e.eval_ExprMem(mem40v), id_x[:8]) - self.assertEqual(e.eval_ExprMem(mem50w), ExprCompose( - [(id_y, 0, 8), (ExprMem(ExprInt32(51), 8), 8, 16)])) - self.assertEqual(e.eval_ExprMem(mem20), mem20) + self.assertEqual(e.eval_expr(ExprMem(addr1 - addr1)), id_x) + self.assertEqual(e.eval_expr(ExprMem(addr1, 8)), id_y) + self.assertEqual(e.eval_expr(ExprMem(addr1 + addr1)), ExprCompose( + id_x[16:32], ExprMem(ExprInt32(4), 16))) + self.assertEqual(e.eval_expr(mem8), ExprCompose( + id_x[0:24], ExprMem(ExprInt32(11), 8))) + self.assertEqual(e.eval_expr(mem40v), id_x[:8]) + self.assertEqual(e.eval_expr(mem50w), ExprCompose( + id_y, ExprMem(ExprInt32(51), 8))) + self.assertEqual(e.eval_expr(mem20), mem20) e.func_read = lambda x: x - self.assertEqual(e.eval_ExprMem(mem20), mem20) + self.assertEqual(e.eval_expr(mem20), mem20) self.assertEqual(set(e.modified()), set(e.symbols)) self.assertRaises( KeyError, e.symbols.__getitem__, ExprMem(ExprInt32(100))) diff --git a/test/ir/translators/z3_ir.py b/test/ir/translators/z3_ir.py index e080c7f5..5fcfe25e 100644 --- a/test/ir/translators/z3_ir.py +++ b/test/ir/translators/z3_ir.py @@ -114,7 +114,7 @@ check_interp(model[memb.get_mem_array(32)], [(0xdeadbeef, 0), (0xdeadbeef + 3, 2)]) # -------------------------------------------------------------------------- -e5 = ExprSlice(ExprCompose(((e, 0, 32), (four, 32, 64))), 0, 32) * five +e5 = ExprSlice(ExprCompose(e, four), 0, 32) * five ez3 = Translator.to_language('z3').from_expr(e5) z3_e5 = z3.Extract(31, 0, z3.Concat(z3_four, z3_e)) * z3_five |