diff options
| author | Fabrice Desclaux <fabrice.desclaux@cea.fr> | 2016-02-25 14:14:07 +0100 |
|---|---|---|
| committer | Fabrice Desclaux <fabrice.desclaux@cea.fr> | 2016-02-26 15:53:53 +0100 |
| commit | 35ead949d8c0d9788ddc602d9dc206f416adcd7b (patch) | |
| tree | c96acb1e57e04c4bfca4901f4fdd349e211e0acc | |
| parent | 55cc4ec75424a841748b62e230a1abb01b258014 (diff) | |
| download | miasm-35ead949d8c0d9788ddc602d9dc206f416adcd7b.tar.gz miasm-35ead949d8c0d9788ddc602d9dc206f416adcd7b.zip | |
AssignBlock
| -rw-r--r-- | example/expression/get_read_write.py | 2 | ||||
| -rw-r--r-- | miasm2/analysis/data_analysis.py | 26 | ||||
| -rw-r--r-- | miasm2/analysis/depgraph.py | 10 | ||||
| -rw-r--r-- | miasm2/arch/aarch64/ira.py | 22 | ||||
| -rw-r--r-- | miasm2/arch/aarch64/sem.py | 30 | ||||
| -rw-r--r-- | miasm2/arch/arm/ira.py | 63 | ||||
| -rw-r--r-- | miasm2/arch/mips32/ira.py | 30 | ||||
| -rw-r--r-- | miasm2/arch/msp430/ira.py | 24 | ||||
| -rw-r--r-- | miasm2/arch/x86/ira.py | 38 | ||||
| -rw-r--r-- | miasm2/arch/x86/sem.py | 41 | ||||
| -rw-r--r-- | miasm2/expression/expression.py | 3 | ||||
| -rw-r--r-- | miasm2/ir/analysis.py | 62 | ||||
| -rw-r--r-- | miasm2/ir/ir.py | 354 | ||||
| -rw-r--r-- | miasm2/ir/ir2C.py | 43 | ||||
| -rw-r--r-- | miasm2/ir/symbexec.py | 54 | ||||
| -rw-r--r-- | test/analysis/depgraph.py | 17 | ||||
| -rw-r--r-- | test/core/sembuilder.py | 8 | ||||
| -rwxr-xr-x | test/ir/analysis.py | 25 |
18 files changed, 431 insertions, 421 deletions
diff --git a/example/expression/get_read_write.py b/example/expression/get_read_write.py index d98b461a..cb9e0900 100644 --- a/example/expression/get_read_write.py +++ b/example/expression/get_read_write.py @@ -1,6 +1,8 @@ from miasm2.arch.x86.arch import mn_x86 from miasm2.expression.expression import get_rw from miasm2.arch.x86.ira import ir_a_x86_32 + + print """ Simple expression manipulation demo. Get read/written registers for a given instruction diff --git a/miasm2/analysis/data_analysis.py b/miasm2/analysis/data_analysis.py index 9451a407..88f9b680 100644 --- a/miasm2/analysis/data_analysis.py +++ b/miasm2/analysis/data_analysis.py @@ -16,19 +16,22 @@ def intra_bloc_flow_raw(ir_arch, flow_graph, irb): in_nodes = {} out_nodes = {} current_nodes = {} - for i, exprs in enumerate(irb.irs): - list_rw = get_list_rw(exprs) + for i, assignblk in enumerate(irb.irs): + dict_rw = assignblk.get_rw(cst_read=True) + if irb.label.offset == 0x13: + print irb.label + print i + print dict_rw current_nodes.update(out_nodes) # gen mem arg to mem node links all_mems = set() - for nodes_r, nodes_w in list_rw: - for n in nodes_r.union(nodes_w): + for node_w, nodes_r in dict_rw.iteritems(): + for n in nodes_r.union([node_w]): all_mems.update(get_expr_mem(n)) if not all_mems: continue - # print [str(x) for x in all_mems] for n in all_mems: node_n_w = get_node_name(irb.label, i, n) if not n in nodes_r: @@ -44,7 +47,7 @@ def intra_bloc_flow_raw(ir_arch, flow_graph, irb): flow_graph.add_uniq_edge(node_n_r, node_n_w) # gen data flow links - for nodes_r, nodes_w in list_rw: + for node_w, nodes_r in dict_rw.iteritems(): for n_r in nodes_r: if n_r in current_nodes: node_n_r = current_nodes[n_r] @@ -54,13 +57,12 @@ def intra_bloc_flow_raw(ir_arch, flow_graph, irb): in_nodes[n_r] = node_n_r flow_graph.add_node(node_n_r) - for n_w in nodes_w: - node_n_w = get_node_name(irb.label, i + 1, n_w) - out_nodes[n_w] = node_n_w - # current_nodes[n_w] = node_n_w - flow_graph.add_node(node_n_w) - flow_graph.add_uniq_edge(node_n_r, node_n_w) + node_n_w = get_node_name(irb.label, i + 1, node_w) + out_nodes[node_w] = node_n_w + + flow_graph.add_node(node_n_w) + flow_graph.add_uniq_edge(node_n_r, node_n_w) irb.in_nodes = in_nodes irb.out_nodes = out_nodes diff --git a/miasm2/analysis/depgraph.py b/miasm2/analysis/depgraph.py index a7c16a19..897de77b 100644 --- a/miasm2/analysis/depgraph.py +++ b/miasm2/analysis/depgraph.py @@ -832,11 +832,11 @@ class DependencyGraph(object): read = set() modifier = False - for affect in self._get_affblock(depnode): - if affect.dst == depnode.element: - elements = self._follow_apply_cb(affect.src) - read.update(elements) - modifier = True + assignblk = self._get_affblock(depnode) + if depnode.element in assignblk: + elements = self._follow_apply_cb(assignblk[depnode.element]) + read.update(elements) + modifier = True # If it's not a modifier affblock, reinject current element if not modifier: diff --git a/miasm2/arch/aarch64/ira.py b/miasm2/arch/aarch64/ira.py index cf44f42c..3f610360 100644 --- a/miasm2/arch/aarch64/ira.py +++ b/miasm2/arch/aarch64/ira.py @@ -2,7 +2,7 @@ #-*- coding:utf-8 -*- from miasm2.expression.expression import * -from miasm2.ir.ir import ir, irbloc +from miasm2.ir.ir import ir, irbloc, AssignBlock from miasm2.ir.analysis import ira from miasm2.arch.aarch64.sem import ir_aarch64l, ir_aarch64b from miasm2.arch.aarch64.regs import * @@ -35,23 +35,14 @@ class ir_a_aarch64l(ir_a_aarch64l_base): b.rw[-1][1].add(self.arch.regs.of) b.rw[-1][1].add(self.arch.regs.cf) - def call_effects(self, ad): - irs = [[ExprAff(self.ret_reg, ExprOp('call_func_ret', ad, self.sp)), - ExprAff(self.sp, ExprOp('call_func_stack', ad, self.sp)), - ]] - return irs - def post_add_bloc(self, bloc, ir_blocs): ir.post_add_bloc(self, bloc, ir_blocs) for irb in ir_blocs: pc_val = None lr_val = None - for exprs in irb.irs: - for e in exprs: - if e.dst == PC: - pc_val = e.src - if e.dst == LR: - lr_val = e.src + for assignblk in irb.irs: + pc_val = assignblk.get(PC, pc_val) + lr_val = assignblk.get(LR, lr_val) if pc_val is None or lr_val is None: continue if not isinstance(lr_val, ExprInt): @@ -60,10 +51,13 @@ class ir_a_aarch64l(ir_a_aarch64l_base): l = bloc.lines[-1] if lr_val.arg != l.offset + l.l: continue + + # CALL lbl = bloc.get_next() new_lbl = self.gen_label() irs = self.call_effects(pc_val) - irs.append([ExprAff(self.IRDst, ExprId(lbl, size=self.pc.size))]) + irs.append(AssignBlock([ExprAff(self.IRDst, + ExprId(lbl, size=self.pc.size))])) nbloc = irbloc(new_lbl, irs) nbloc.lines = [l] * len(irs) self.blocs[new_lbl] = nbloc diff --git a/miasm2/arch/aarch64/sem.py b/miasm2/arch/aarch64/sem.py index 9c896095..b198bc43 100644 --- a/miasm2/arch/aarch64/sem.py +++ b/miasm2/arch/aarch64/sem.py @@ -1,5 +1,5 @@ from miasm2.expression import expression as m2_expr -from miasm2.ir.ir import ir, irbloc +from miasm2.ir.ir import ir, irbloc, AssignBlock from miasm2.arch.aarch64.arch import mn_aarch64, conds_expr, replace_regs from miasm2.arch.aarch64.regs import * from miasm2.core.sembuilder import SemBuilder @@ -778,19 +778,21 @@ class ir_aarch64l(ir): return m2_expr.ExprAff(dst, src) def irbloc_fix_regs_for_mode(self, irbloc, mode=64): - for irs in irbloc.irs: - for i, e in enumerate(irs): - """ - special case for 64 bits: - if destination is a 32 bit reg, zero extend the 64 bit reg - """ - if (isinstance(e.dst, m2_expr.ExprId) and - e.dst.size == 32 and - e.dst in replace_regs): - src = self.expr_fix_regs_for_mode(e.src) - dst = replace_regs[e.dst].arg - e = m2_expr.ExprAff(dst, src.zeroExtend(64)) - irs[i] = self.expr_fix_regs_for_mode(e) + for assignblk in irbloc.irs: + for dst, src in assignblk.items(): + del(assignblk[dst]) + # Special case for 64 bits: + # If destination is a 32 bit reg, zero extend the 64 bit reg + + if (isinstance(dst, m2_expr.ExprId) and + dst.size == 32 and + dst in replace_regs): + src = src.zeroExtend(64) + dst = replace_regs[dst].arg + + dst = self.expr_fix_regs_for_mode(dst) + src = self.expr_fix_regs_for_mode(src) + assignblk[dst] = src irbloc.dst = self.expr_fix_regs_for_mode(irbloc.dst) def mod_pc(self, instr, instr_ir, extra_ir): diff --git a/miasm2/arch/arm/ira.py b/miasm2/arch/arm/ira.py index b918a2e6..b279fab5 100644 --- a/miasm2/arch/arm/ira.py +++ b/miasm2/arch/arm/ira.py @@ -2,7 +2,7 @@ #-*- coding:utf-8 -*- from miasm2.expression.expression import * -from miasm2.ir.ir import ir, irbloc +from miasm2.ir.ir import ir, irbloc, AssignBlock from miasm2.ir.analysis import ira from miasm2.arch.arm.sem import ir_arml, ir_armtl, ir_armb, ir_armtb from miasm2.arch.arm.regs import * @@ -33,26 +33,14 @@ class ir_a_arml(ir_a_arml_base): b.rw[-1][1].add(self.arch.regs.of) b.rw[-1][1].add(self.arch.regs.cf) - def call_effects(self, ad): - irs = [[ExprAff(self.ret_reg, ExprOp('call_func_ret', ad, self.sp)), - ExprAff(self.sp, ExprOp('call_func_stack', ad, self.sp)), - ]] - return irs - def post_add_bloc(self, bloc, ir_blocs): ir.post_add_bloc(self, bloc, ir_blocs) - # flow_graph = DiGraph() for irb in ir_blocs: - # print 'X'*40 - # print irb pc_val = None lr_val = None - for exprs in irb.irs: - for e in exprs: - if e.dst == PC: - pc_val = e.src - if e.dst == LR: - lr_val = e.src + for assignblk in irb.irs: + pc_val = assignblk.get(PC, pc_val) + lr_val = assignblk.get(LR, lr_val) if pc_val is None or lr_val is None: continue if not isinstance(lr_val, ExprInt): @@ -61,51 +49,18 @@ class ir_a_arml(ir_a_arml_base): l = bloc.lines[-1] if lr_val.arg != l.offset + l.l: continue - # print 'IS CALL!' + + # CALL lbl = bloc.get_next() new_lbl = self.gen_label() irs = self.call_effects(pc_val) - irs.append([ExprAff(self.IRDst, ExprId(lbl, size=self.pc.size))]) + irs.append(AssignBlock([ExprAff(self.IRDst, + ExprId(lbl, size=self.pc.size))])) nbloc = irbloc(new_lbl, irs) - nbloc.lines = [l]*len(irs) + nbloc.lines = [l] * len(irs) self.blocs[new_lbl] = nbloc irb.dst = ExprId(new_lbl, size=self.pc.size) - """ - if not bloc.lines: - return - l = bloc.lines[-1] - sub_call_dst = None - if not l.is_subcall(): - return - sub_call_dst = l.args[0] - if self.ExprIsLabel(sub_call_dst): - sub_call_dst = sub_call_dst.name - for b in ir_blocs: - l = b.lines[-1] - sub_call_dst_b = None - sub_call_dst_b = l.args[0] - #if self.ExprIsLabel(sub_call_dst_b): - # sub_call_dst_b = sub_call_dst.name - #if str(b.dst) == str(sub_call_dst_b): - # pass - if not l.is_subcall(): - continue - if b.dst != sub_call_dst_b: - continue - sub_call_dst_b = l.args[0] - if self.ExprIsLabel(sub_call_dst_b): - sub_call_dst_b = sub_call_dst.name - lbl = bloc.get_next() - new_lbl = self.gen_label() - irs = self.call_effects(l.args[0]) - nbloc = irbloc(new_lbl, ExprId(lbl, size=self.pc.size), irs) - nbloc.lines = [l] - self.blocs[new_lbl] = nbloc - b.dst = ExprId(new_lbl, size=self.pc.size) - return - """ - def get_out_regs(self, b): return set([self.ret_reg, self.sp]) diff --git a/miasm2/arch/mips32/ira.py b/miasm2/arch/mips32/ira.py index f88172fb..de508e41 100644 --- a/miasm2/arch/mips32/ira.py +++ b/miasm2/arch/mips32/ira.py @@ -2,7 +2,7 @@ #-*- coding:utf-8 -*- from miasm2.expression.expression import * -from miasm2.ir.ir import ir, irbloc +from miasm2.ir.ir import ir, irbloc, AssignBlock from miasm2.ir.analysis import ira from miasm2.arch.mips32.sem import ir_mips32l, ir_mips32b from miasm2.arch.mips32.regs import * @@ -18,26 +18,15 @@ class ir_a_mips32l(ir_mips32l, ira): def set_dead_regs(self, b): pass - def call_effects(self, ad): - irs = [[ExprAff(self.ret_reg, ExprOp('call_func_ret', ad, self.sp)), - ExprAff(self.sp, ExprOp('call_func_stack', ad, self.sp)), - ]] - return irs - def post_add_bloc(self, bloc, ir_blocs): ir.post_add_bloc(self, bloc, ir_blocs) for irb in ir_blocs: - # print 'X'*40 - # print irb pc_val = None lr_val = None - for exprs in irb.irs: - for e in exprs: - if e.dst == PC: - pc_val = e.src - if e.dst == RA: - lr_val = e.src - #print "XXX", pc_val, lr_val + for assignblk in irb.irs: + pc_val = assignblk.get(PC, pc_val) + lr_val = assignblk.get(RA, lr_val) + if pc_val is None or lr_val is None: continue if not expr_is_int_or_label(lr_val): @@ -46,18 +35,17 @@ class ir_a_mips32l(ir_mips32l, ira): lr_val = ExprInt32(lr_val.name.offset) l = bloc.lines[-2] - #print 'TEST', l, hex(lr_val.arg), hex(l.offset + 8) - #print lr_val.arg, hex(l.offset + l.l) if lr_val.arg != l.offset + 8: raise ValueError("Wrong arg") - # print 'IS CALL!' + # CALL lbl = bloc.get_next() new_lbl = self.gen_label() irs = self.call_effects(pc_val) - irs.append([ExprAff(self.IRDst, ExprId(lbl, size=self.pc.size))]) + irs.append(AssignBlock([ExprAff(self.IRDst, + ExprId(lbl, size=self.pc.size))])) nbloc = irbloc(new_lbl, irs) - nbloc.lines = [l] + nbloc.lines = [l] * len(irs) self.blocs[new_lbl] = nbloc irb.dst = ExprId(new_lbl, size=self.pc.size) diff --git a/miasm2/arch/msp430/ira.py b/miasm2/arch/msp430/ira.py index 26a53a1e..f9da81a7 100644 --- a/miasm2/arch/msp430/ira.py +++ b/miasm2/arch/msp430/ira.py @@ -2,7 +2,7 @@ #-*- coding:utf-8 -*- from miasm2.expression.expression import * -from miasm2.ir.ir import ir, irbloc +from miasm2.ir.ir import ir, irbloc, AssignBlock from miasm2.ir.analysis import ira from miasm2.arch.msp430.sem import ir_msp430 from miasm2.arch.msp430.regs import * @@ -35,39 +35,27 @@ class ir_a_msp430(ir_a_msp430_base): b.rw[-1][1].add(self.arch.regs.cpuoff) b.rw[-1][1].add(self.arch.regs.gie) - def call_effects(self, ad): - irs = [[ExprAff(self.ret_reg, ExprOp('call_func_ret', ad, self.sp)), - ExprAff(self.sp, ExprOp('call_func_stack', ad, self.sp)), - ]] - return irs - def post_add_bloc(self, bloc, ir_blocs): ir.post_add_bloc(self, bloc, ir_blocs) - # flow_graph = DiGraph() - l = bloc.lines[-1] if not l.is_subcall(): return for irb in ir_blocs: - # print 'X'*40 - # print irb pc_val = None - for exprs in irb.irs: - for e in exprs: - if e.dst == PC: - pc_val = e.src + for assignblk in irb.irs: + pc_val = assignblk.get(PC, pc_val) if pc_val is None: continue l = bloc.lines[-1] - # print str(l), 'IS CALL!' lbl = bloc.get_next() new_lbl = self.gen_label() irs = self.call_effects(pc_val) - irs.append([ExprAff(self.IRDst, ExprId(lbl, size=self.pc.size))]) + irs.append(AssignBlock([ExprAff(self.IRDst, + ExprId(lbl, size=self.pc.size))])) nbloc = irbloc(new_lbl, irs) - nbloc.lines = [l] + nbloc.lines = [l] * len(irs) self.blocs[new_lbl] = nbloc irb.dst = ExprId(new_lbl, size=self.pc.size) diff --git a/miasm2/arch/x86/ira.py b/miasm2/arch/x86/ira.py index b7a1f19f..d496f380 100644 --- a/miasm2/arch/x86/ira.py +++ b/miasm2/arch/x86/ira.py @@ -4,7 +4,7 @@ from miasm2.expression.expression import ExprAff, ExprOp, ExprId from miasm2.core.graph import DiGraph from miasm2.core.asmbloc import expr_is_label -from miasm2.ir.ir import ir, irbloc +from miasm2.ir.ir import ir, irbloc, AssignBlock from miasm2.ir.analysis import ira from miasm2.arch.x86.sem import ir_x86_16, ir_x86_32, ir_x86_64 @@ -32,12 +32,6 @@ class ir_a_x86_16(ir_x86_16, ira): for b in leaves: self.set_dead_regs(b) - def call_effects(self, ad): - irs = [[ExprAff(self.ret_reg, ExprOp('call_func_ret', ad, self.sp)), - ExprAff(self.sp, ExprOp('call_func_stack', ad, self.sp)), - ]] - return irs - def post_add_bloc(self, bloc, ir_blocs): ir.post_add_bloc(self, bloc, ir_blocs) if not bloc.lines: @@ -49,8 +43,8 @@ class ir_a_x86_16(ir_x86_16, ira): sub_call_dst = l.args[0] if expr_is_label(sub_call_dst): sub_call_dst = sub_call_dst.name - for b in ir_blocs: - l = b.lines[-1] + for irb in ir_blocs: + l = irb.lines[-1] sub_call_dst = None if not l.is_subcall(): continue @@ -60,12 +54,13 @@ class ir_a_x86_16(ir_x86_16, ira): lbl = bloc.get_next() new_lbl = self.gen_label() irs = self.call_effects(l.args[0]) - irs.append([ExprAff(self.IRDst, ExprId(lbl, size=self.pc.size))]) + irs.append(AssignBlock([ExprAff(self.IRDst, + ExprId(lbl, size=self.pc.size))])) nbloc = irbloc(new_lbl, irs) - nbloc.lines = [l] + nbloc.lines = [l] * len(irs) self.blocs[new_lbl] = nbloc - b.dst = ExprId(new_lbl, size=self.pc.size) + irb.dst = ExprId(new_lbl, size=self.pc.size) return @@ -98,15 +93,16 @@ class ir_a_x86_64(ir_x86_64, ir_a_x86_16): self.ret_reg = self.arch.regs.RAX def call_effects(self, ad): - irs = [[ExprAff(self.ret_reg, ExprOp('call_func_ret', ad, self.sp, - self.arch.regs.RCX, - self.arch.regs.RDX, - self.arch.regs.R8, - self.arch.regs.R9, - )), - ExprAff(self.sp, ExprOp('call_func_stack', ad, self.sp)), - ]] - return irs + return [AssignBlock([ExprAff(self.ret_reg, ExprOp('call_func_ret', ad, + self.sp, + self.arch.regs.RCX, + self.arch.regs.RDX, + self.arch.regs.R8, + self.arch.regs.R9, + )), + ExprAff(self.sp, ExprOp('call_func_stack', + ad, self.sp)), + ])] def sizeof_char(self): return 8 diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index 18410bf9..c14d6f97 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -3620,11 +3620,11 @@ def ps_rl_ll(ir, instr, a, b, op, size): m2_expr.ExprAff(ir.IRDst, lbl_next)] e_do = [] + slices = [] for i in xrange(0, a.size, size): - e.append(m2_expr.ExprAff(a[i:i + size], m2_expr.ExprOp(op, - a[i:i + size], - count[:size]))) - + slices.append((m2_expr.ExprOp(op,a[i:i + size], count[:size]), + i, i + size)) + e.append(m2_expr.ExprAff(a[0:a.size], m2_expr.ExprCompose(slices))) e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) return e, [irbloc(lbl_do.name, [e_do]), irbloc(lbl_zero.name, [e_zero])] @@ -3745,12 +3745,13 @@ def pcmpeqd(ir, instr, a, b): def punpck(ir, instr, a, b, size, off): e = [] + slices = [] for i in xrange(a.size / (2 * size)): src1 = a[size * i + off: size * i + off + size] src2 = b[size * i + off: size * i + off + size] - e.append(m2_expr.ExprAff(a[size * 2 * i: size * 2 * i + size], src1)) - e.append( - m2_expr.ExprAff(a[size * (2 * i + 1): size * (2 * i + 1) + size], src2)) + slices.append((src1, size * 2 * i, size * 2 * i + size)) + slices.append((src2, size * (2 * i + 1), size * (2 * i + 1) + size)) + e.append(m2_expr.ExprAff(a, m2_expr.ExprCompose(slices))) return e, [] @@ -4553,20 +4554,20 @@ class ir_x86_16(ir): return m2_expr.ExprAff(dst, src) def irbloc_fix_regs_for_mode(self, irbloc, mode=64): - for irs in irbloc.irs: - for i, e in enumerate(irs): - """ - special case for 64 bits: - if destination is a 32 bit reg, zero extend the 64 bit reg - """ + for assignblk in irbloc.irs: + for dst, src in assignblk.items(): + del(assignblk[dst]) + # Special case for 64 bits: + # If destination is a 32 bit reg, zero extend the 64 bit reg if mode == 64: - if (isinstance(e.dst, m2_expr.ExprId) and - e.dst.size == 32 and - e.dst in replace_regs[64]): - src = self.expr_fix_regs_for_mode(e.src, mode) - dst = replace_regs[64][e.dst].arg - e = m2_expr.ExprAff(dst, src.zeroExtend(64)) - irs[i] = self.expr_fix_regs_for_mode(e, mode) + if (isinstance(dst, m2_expr.ExprId) and + dst.size == 32 and + dst in replace_regs[64]): + src = src.zeroExtend(64) + dst = replace_regs[64][dst].arg + dst = self.expr_fix_regs_for_mode(dst, mode) + src = self.expr_fix_regs_for_mode(src, mode) + assignblk[dst] = src irbloc.dst = self.expr_fix_regs_for_mode(irbloc.dst, mode) diff --git a/miasm2/expression/expression.py b/miasm2/expression/expression.py index bda0dbc4..9ee3f360 100644 --- a/miasm2/expression/expression.py +++ b/miasm2/expression/expression.py @@ -1368,9 +1368,6 @@ def get_list_rw(exprs, mem_read=False, cst_read=True): # each cst is indexed o_r_rw = set() for r in o_r: - # if isinstance(r, ExprInt): - # r = ExprOp('cst_%d'%cst_num, r) - # cst_num += 1 o_r_rw.add(r) o_r = o_r_rw list_rw.append((o_r, o_w)) diff --git a/miasm2/ir/analysis.py b/miasm2/ir/analysis.py index 40a3bf64..9cb15811 100644 --- a/miasm2/ir/analysis.py +++ b/miasm2/ir/analysis.py @@ -4,9 +4,9 @@ import logging from miasm2.ir.symbexec import symbexec -from miasm2.ir.ir import ir +from miasm2.ir.ir import ir, AssignBlock from miasm2.expression.expression \ - import ExprAff, ExprCond, ExprId, ExprInt, ExprMem + import ExprAff, ExprCond, ExprId, ExprInt, ExprMem, ExprOp log = logging.getLogger("analysis") console_handler = logging.StreamHandler() @@ -29,6 +29,17 @@ class ira(ir): """Returns ids of all registers used in the IR""" return self.arch.regs.all_regs_ids + [self.IRDst] + def call_effects(self, ad): + """ + Default simulation of a function call to @ad + @ad: (Expr) address of the called function + """ + return [AssignBlock( + [ExprAff(self.ret_reg, ExprOp('call_func_ret', ad, self.sp)), + ExprAff(self.sp, ExprOp( + 'call_func_stack', ad, self.sp)), + ])] + def remove_dead_instr(self, irb, useful): """Remove dead affectations using previous reaches analysis @irb: irbloc instance @@ -37,16 +48,12 @@ class ira(ir): PRE: compute_reach(self) """ modified = False - for k, ir in enumerate(irb.irs): - j = 0 - while j < len(ir): - cur_instr = ir[j] - if (isinstance(cur_instr.dst, ExprId) - and (irb.label, k, cur_instr) not in useful): - del ir[j] + for idx, assignblk in enumerate(irb.irs): + for dst in assignblk.keys(): + if (isinstance(dst, ExprId) and + (irb.label, idx, dst) not in useful): + del assignblk[dst] modified = True - else: - j += 1 return modified def init_useful_instr(self): @@ -74,17 +81,17 @@ class ira(ir): # reaching this block for r in self.ira_regs_ids(): useful.update(block.cur_reach[-1][r].union( - block.defout[-1][r])) + block.defout[-1][r])) # Function call, memory write or IRDst affectation - for k, ir in enumerate(block.irs): - for i_cur in ir: - if i_cur.src.is_function_call(): + for idx, assignblk in enumerate(block.irs): + for dst, src in assignblk.iteritems(): + if src.is_function_call(): # /!\ never remove ir calls - useful.add((block.label, k, i_cur)) - if isinstance(i_cur.dst, ExprMem): - useful.add((block.label, k, i_cur)) - useful.update(block.defout[k][self.IRDst]) + useful.add((block.label, idx, dst)) + if isinstance(dst, ExprMem): + useful.add((block.label, idx, dst)) + useful.update(block.defout[idx][self.IRDst]) # Affecting return registers if not has_son: @@ -112,12 +119,13 @@ class ira(ir): while worklist: elem = worklist.pop() useful.add(elem) - irb, irs_ind, ins = elem + irb, irs_ind, dst = elem - block = self.blocs[irb] - instr_defout = block.defout[irs_ind] - cur_kill = block.cur_kill[irs_ind] - cur_reach = block.cur_reach[irs_ind] + irb = self.blocs[irb] + ins = irb.irs[irs_ind].dst2ExprAff(dst) + instr_defout = irb.defout[irs_ind] + cur_kill = irb.cur_kill[irs_ind] + cur_reach = irb.cur_reach[irs_ind] # Handle dependencies of used variables in ins for reg in ins.get_r(True).intersection(self.ira_regs_ids()): @@ -126,9 +134,9 @@ class ira(ir): cur_kill[reg] if not instr_defout[reg] else set())) - for _, _, i in instr_defout[reg]: - # Loop case (i in defout of current block) - if i == ins: + for _, _, defout_dst in instr_defout[reg]: + # Loop case (dst in defout of current irb) + if defout_dst == dst: worklist.update(cur_reach[reg].difference(useful)) return useful diff --git a/miasm2/ir/ir.py b/miasm2/ir/ir.py index fa34cd01..ffcf5480 100644 --- a/miasm2/ir/ir.py +++ b/miasm2/ir/ir.py @@ -18,7 +18,7 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # - +from itertools import chain import miasm2.expression.expression as m2_expr from miasm2.expression.expression_helper import get_missing_interval @@ -28,6 +28,135 @@ from miasm2.core.asmbloc import asm_symbol_pool, expr_is_label, asm_label, \ from miasm2.core.graph import DiGraph +class AssignBlock(dict): + + def __init__(self, irs=None): + """@irs seq""" + if irs is None: + irs = [] + super(AssignBlock, self).__init__() + + for expraff in irs: + # Concurrent assignments are handled in __setitem__ + self[expraff.dst] = expraff.src + + def __setitem__(self, dst, src): + """ + Special cases: + * if dst is an ExprSlice, expand it to affect the full Expression + * if dst already known, sources are merged + """ + + if dst.size != src.size: + raise RuntimeError( + "sanitycheck: args must have same size! %s" % + ([(str(arg), arg.size) for arg in [dst, src]])) + + if isinstance(dst, m2_expr.ExprSlice): + # Complete the source with missing slice parts + new_dst = dst.arg + rest = [(m2_expr.ExprSlice(dst.arg, r[0], r[1]), r[0], r[1]) + for r in dst.slice_rest()] + all_a = [(src, dst.start, dst.stop)] + rest + all_a.sort(key=lambda x: x[1]) + new_src = m2_expr.ExprCompose(all_a) + else: + new_dst, new_src = dst, src + + if new_dst in self and isinstance(new_src, m2_expr.ExprCompose): + if not isinstance(self[new_dst], m2_expr.ExprCompose): + # prev_RAX = 0x1122334455667788 + # input_RAX[0:8] = 0x89 + # final_RAX -> ? (assignment are in parallel) + raise RuntimeError("Concurent access on same bit not allowed") + + # Consider slice grouping + expr_list = [(new_dst, new_src), + (new_dst, self[new_dst])] + # Find collision + e_colision = reduce(lambda x, y: x.union(y), + (self.get_modified_slice(dst, src) + for (dst, src) in expr_list), + set()) + + # Sort interval collision + known_intervals = sorted([(x[1], x[2]) for x in e_colision]) + + for i, (_, stop) in enumerate(known_intervals[:-1]): + if stop > known_intervals[i + 1][0]: + raise RuntimeError( + "Concurent access on same bit not allowed") + + # Fill with missing data + missing_i = get_missing_interval(known_intervals, 0, new_dst.size) + remaining = ((m2_expr.ExprSlice(new_dst, *interval), + interval[0], + interval[1]) + for interval in missing_i) + + # Build the merging expression + new_src = m2_expr.ExprCompose(e_colision.union(remaining)) + + super(AssignBlock, self).__setitem__(new_dst, new_src) + + @staticmethod + def get_modified_slice(dst, src): + """Return an Expr list of extra expressions needed during the + object instanciation""" + + if not isinstance(src, m2_expr.ExprCompose): + raise ValueError("Get mod slice not on expraff slice", str(self)) + modified_s = [] + for arg in src.args: + if (not isinstance(arg[0], m2_expr.ExprSlice) or + arg[0].arg != dst or + arg[1] != arg[0].start or + arg[2] != arg[0].stop): + # If x is not the initial expression + modified_s.append(arg) + return modified_s + + def get_w(self): + """Return a set of elements written""" + return set(self.keys()) + + def get_rw(self, mem_read=False, cst_read=False): + """Return a dictionnary associating written expressions to a set of + their read requirements + @mem_read: (optional) mem_read argument of `get_r` + @cst_read: (optional) cst_read argument of `get_r` + """ + out = {} + for dst, src in self.iteritems(): + src_read = src.get_r(mem_read=mem_read, cst_read=cst_read) + if isinstance(dst, m2_expr.ExprMem): + # Read on destination happens only with ExprMem + src_read.update(dst.arg.get_r(mem_read=mem_read, + cst_read=cst_read)) + out[dst] = src_read + return out + + def get_r(self, mem_read=False, cst_read=False): + """Return a set of elements reads + @mem_read: (optional) mem_read argument of `get_r` + @cst_read: (optional) cst_read argument of `get_r` + """ + return set( + chain.from_iterable(self.get_rw(mem_read=mem_read, + cst_read=cst_read).itervalues())) + + def __str__(self): + out = [] + for dst, src in sorted(self.iteritems()): + out.append("%s = %s" % (dst, src)) + return "\n".join(out) + + def dst2ExprAff(self, dst): + """Return an ExprAff corresponding to @dst equation + @dst: Expr instance""" + return m2_expr.ExprAff(dst, self[dst]) + + class irbloc(object): def __init__(self, label, irs, lines=None): @@ -45,26 +174,29 @@ class irbloc(object): """Find the IRDst affectation and update dst, dst_linenb accordingly""" if self._dst is not None: return self._dst - dst = None - for linenb, ir in enumerate(self.irs): - for i in ir: - if isinstance(i.dst, m2_expr.ExprId) and i.dst.name == "IRDst": - if dst is not None: + final_dst = None + for linenb, assignblk in enumerate(self.irs): + for dst, src in assignblk.iteritems(): + if isinstance(dst, m2_expr.ExprId) and dst.name == "IRDst": + if final_dst is not None: raise ValueError('Multiple destinations!') - dst = i.src - self._dst = dst + final_dst = src + self._dst = final_dst self._dst_linenb = linenb - return dst + return final_dst def _set_dst(self, value): """Find and replace the IRDst affectation's source by @value""" if self._dst_linenb is None: self._get_dst() - ir = self.irs[self._dst_linenb] - for i, expr in enumerate(ir): - if isinstance(expr.dst, m2_expr.ExprId) and expr.dst.name == "IRDst": - ir[i] = m2_expr.ExprAff(expr.dst, value) + assignblk = self.irs[self._dst_linenb] + for dst in assignblk: + if isinstance(dst, m2_expr.ExprId) and dst.name == "IRDst": + del(assignblk[dst]) + assignblk[dst] = value + # Sanity check is already done in _get_dst + break self._dst = value dst = property(_get_dst, _set_dst) @@ -90,34 +222,32 @@ class irbloc(object): for _ in xrange(len(self.irs))] self.prev_kill = [{reg: set() for reg in regs_ids} for _ in xrange(len(self.irs))] + # LineNumber -> dict: + # Register: set(definition(irb label, index)) self.defout = [{reg: set() for reg in regs_ids} for _ in xrange(len(self.irs))] - - for k, ir in enumerate(self.irs): - r, w = set(), set() - for i in ir: - r.update(x for x in i.get_r(True) - if isinstance(x, m2_expr.ExprId)) - w.update(x for x in i.get_w() - if isinstance(x, m2_expr.ExprId)) - if isinstance(i.dst, m2_expr.ExprMem): - r.update(x for x in i.dst.arg.get_r(True) - if isinstance(x, m2_expr.ExprId)) - self.defout[k].update((x, {(self.label, k, i)}) - for x in i.get_w() - if isinstance(x, m2_expr.ExprId)) - self.r.append(r) - self.w.append(w) + keep_exprid = lambda elts: filter(lambda expr: isinstance(expr, + m2_expr.ExprId), + elts) + for idx, assignblk in enumerate(self.irs): + read, write = map(keep_exprid, + (assignblk.get_r(mem_read=True), + assignblk.get_w())) + + self.defout[idx].update({dst: set([(self.label, idx, dst)]) + for dst in assignblk + if isinstance(dst, m2_expr.ExprId)}) + self.r.append(read) + self.w.append(write) def __str__(self): - o = [] - o.append('%s' % self.label) - for expr in self.irs: - for e in expr: - o.append('\t%s' % e) - o.append("") - - return "\n".join(o) + out = [] + out.append('%s' % self.label) + for assignblk in self.irs: + for dst, src in assignblk.iteritems(): + out.append('\t%s = %s' % (dst, src)) + out.append("") + return "\n".join(out) class DiGraphIR(DiGraph): @@ -139,13 +269,14 @@ class DiGraphIR(DiGraph): if node not in self._blocks: yield [self.DotCellDescription(text="NOT PRESENT", attr={})] raise StopIteration - for i, exprs in enumerate(self._blocks[node].irs): - for expr in exprs: + for i, assignblk in enumerate(self._blocks[node].irs): + for dst, src in assignblk.iteritems(): + line = "%s = %s" % (dst, src) if self._dot_offset: yield [self.DotCellDescription(text="%-4d" % i, attr={}), - self.DotCellDescription(text=str(expr), attr={})] + self.DotCellDescription(text=line, attr={})] else: - yield self.DotCellDescription(text=str(expr), attr={}) + yield self.DotCellDescription(text=line, attr={}) yield self.DotCellDescription(text="", attr={}) def edge_attr(self, src, dst): @@ -190,9 +321,15 @@ class ir(object): # Lazy structure self._graph = None + def get_ir(self, instr): + raise NotImplementedError("Abstract Method") + def instr2ir(self, l): - ir_bloc_cur, ir_blocs_extra = self.get_ir(l) - return ir_bloc_cur, ir_blocs_extra + ir_bloc_cur, extra_assignblk = self.get_ir(l) + assignblk = AssignBlock(ir_bloc_cur) + for irb in extra_assignblk: + irb.irs = map(AssignBlock, irb.irs) + return assignblk, extra_assignblk def get_label(self, ad): """Transforms an ExprId/ExprInt/label/int into a label @@ -221,62 +358,6 @@ class ir(object): b.lines = [l] self.add_bloc(b, gen_pc_updt) - def merge_multi_affect(self, affect_list): - """ - If multiple affection to a same ExprId are present in @affect_list, - merge them (in place). - For instance, XCGH AH, AL semantic is - [ - RAX = {RAX[0:8],0,8, RAX[0:8],8,16, RAX[16:64],16,64} - RAX = {RAX[8:16],0,8, RAX[8:64],8,64} - ] - This function will update @affect_list to replace previous ExprAff by - [ - RAX = {RAX[8:16],0,8, RAX[0:8],8,16, RAX[16:64],16,64} - ] - """ - - # Extract side effect - effect = {} - for expr in affect_list: - effect[expr.dst] = effect.get(expr.dst, []) + [expr] - - # Find candidates - for dst, expr_list in effect.items(): - if len(expr_list) <= 1: - continue - - # Only treat ExprCompose list - if any(map(lambda e: not(isinstance(e.src, m2_expr.ExprCompose)), - expr_list)): - continue - - # Find collision - e_colision = reduce(lambda x, y: x.union(y), - (e.get_modified_slice() for e in expr_list), - set()) - # Sort interval collision - known_intervals = sorted([(x[1], x[2]) for x in e_colision]) - - # Fill with missing data - missing_i = get_missing_interval(known_intervals, 0, dst.size) - - remaining = ((m2_expr.ExprSlice(dst, *interval), - interval[0], - interval[1]) - for interval in missing_i) - - # Build the merging expression - slices = sorted(e_colision.union(remaining), key=lambda x: x[1]) - final_dst = m2_expr.ExprCompose(slices) - - # Remove unused expression - for expr in expr_list: - affect_list.remove(expr) - - # Add the merged one - affect_list.append(m2_expr.ExprAff(dst, final_dst)) - def getby_offset(self, offset): out = set() for irb in self.blocs.values(): @@ -286,8 +367,9 @@ class ir(object): return out def gen_pc_update(self, c, l): - c.irs.append([m2_expr.ExprAff(self.pc, m2_expr.ExprInt_from(self.pc, - l.offset))]) + c.irs.append(AssignBlock([m2_expr.ExprAff(self.pc, + m2_expr.ExprInt_from(self.pc, + l.offset))])) c.lines.append(l) def add_bloc(self, bloc, gen_pc_updt=False): @@ -298,12 +380,12 @@ class ir(object): label = self.get_instr_label(l) c = irbloc(label, [], []) ir_blocs_all.append(c) - ir_bloc_cur, ir_blocs_extra = self.instr2ir(l) + assignblk, ir_blocs_extra = self.instr2ir(l) if gen_pc_updt is not False: self.gen_pc_update(c, l) - c.irs.append(ir_bloc_cur) + c.irs.append(assignblk) c.lines.append(l) if ir_blocs_extra: @@ -337,23 +419,15 @@ class ir(object): continue dst = m2_expr.ExprId(self.get_next_label(bloc.lines[-1]), self.pc.size) - b.irs.append([m2_expr.ExprAff(self.IRDst, dst)]) + b.irs.append(AssignBlock([m2_expr.ExprAff(self.IRDst, dst)])) b.lines.append(b.lines[-1]) - def gen_edges(self, bloc, ir_blocs): - pass - def post_add_bloc(self, bloc, ir_blocs): self.set_empty_dst_to_next(bloc, ir_blocs) - self.gen_edges(bloc, ir_blocs) for irb in ir_blocs: self.irbloc_fix_regs_for_mode(irb, self.attrib) - # Detect multi-affectation - for affect_list in irb.irs: - self.merge_multi_affect(affect_list) - self.blocs[irb.label] = irb # Forget graph if any @@ -375,15 +449,17 @@ class ir(object): return l def simplify_blocs(self): - for b in self.blocs.values(): - for ir in b.irs: - for i, r in enumerate(ir): - ir[i] = m2_expr.ExprAff(expr_simp(r.dst), expr_simp(r.src)) + for irb in self.blocs.values(): + for assignblk in irb.irs: + for dst, src in assignblk.items(): + del assignblk[dst] + assignblk[expr_simp(dst)] = expr_simp(src) def replace_expr_in_ir(self, bloc, rep): - for irs in bloc.irs: - for i, l in enumerate(irs): - irs[i] = l.replace_expr(rep) + for assignblk in bloc.irs: + for dst, src in assignblk.items(): + del assignblk[dst] + assignblk[dst.replace_expr(rep)] = src.replace_expr(rep) def get_rw(self, regs_ids=None): """ @@ -395,7 +471,11 @@ class ir(object): for b in self.blocs.values(): b.get_rw(regs_ids) - def sort_dst(self, todo, done): + def _extract_dst(self, todo, done): + """ + Naive extraction of @todo destinations + WARNING: @todo and @done are modified + """ out = set() while todo: dst = todo.pop() @@ -412,30 +492,26 @@ class ir(object): done.add(dst) return out - def dst_trackback(self, b): - dst = b.dst - todo = set([dst]) + def dst_trackback(self, irb): + """ + Naive backtracking of IRDst + @irb: irbloc instance + """ + todo = set([irb.dst]) done = set() - for irs in reversed(b.irs): - if len(todo) == 0: + for assignblk in reversed(irb.irs): + if not todo: break - out = self.sort_dst(todo, done) + out = self._extract_dst(todo, done) found = set() follow = set() - for i in irs: - if not out: - break - for o in out: - if i.dst == o: - follow.add(i.src) - found.add(o) - for o in found: - out.remove(o) - - for o in out: - if o not in found: - follow.add(o) + for dst in out: + if dst in assignblk: + follow.add(assignblk[dst]) + found.add(dst) + + follow.update(out.difference(found)) todo = follow return done diff --git a/miasm2/ir/ir2C.py b/miasm2/ir/ir2C.py index 31af2980..d888f586 100644 --- a/miasm2/ir/ir2C.py +++ b/miasm2/ir/ir2C.py @@ -200,7 +200,7 @@ def gen_irdst(ir_arch, e): out.append('%s;'%(gen_resolve_dst_simple(ir_arch, e))) return out -def Expr2C(ir_arch, l, exprs, gen_exception_code=False): +def Expr2C(ir_arch, l, assignblk, gen_exception_code=False): id_to_update = [] out = ["// %s" % (l)] out_pc = [] @@ -211,31 +211,24 @@ def Expr2C(ir_arch, l, exprs, gen_exception_code=False): prefect_index = {8: 0, 16: 0, 32: 0, 64: 0} new_expr = [] - e = set_pc(ir_arch, l.offset & mask_int) - #out.append("%s;" % patch_c_id(ir_arch.arch, e))) - pc_is_dst = False fetch_mem = False set_exception_flags = False - for e in exprs: - assert isinstance(e, m2_expr.ExprAff) - assert not isinstance(e.dst, m2_expr.ExprOp) - if isinstance(e.dst, m2_expr.ExprId): - if not e.dst in dst_dict: - dst_dict[e.dst] = [] - dst_dict[e.dst].append(e) - else: - new_expr.append(e) + for dst, src in assignblk.iteritems(): + assert not isinstance(dst, m2_expr.ExprOp) + if dst in dst_dict: + raise RuntimeError("warning: detected multi dst to same id") + new_expr.append((dst, src)) # test exception flags - ops = m2_expr.get_expr_ops(e) + ops = m2_expr.get_expr_ops(src) if set(['umod', 'udiv']).intersection(ops): set_exception_flags = True - if e.dst == exception_flags: + if dst == exception_flags: set_exception_flags = True # TODO XXX test function whose set exception_flags # search mem lookup for generate mem read prefetch - rs = e.src.get_r(mem_read=True) + rs = src.get_r(mem_read=True) for r in rs: if (not isinstance(r, m2_expr.ExprMem)) or r in src_mem: continue @@ -245,14 +238,6 @@ def Expr2C(ir_arch, l, exprs, gen_exception_code=False): pfmem = prefetch_id_size[r.size][index] src_mem[r] = pfmem - for dst, exs in dst_dict.items(): - if len(exs) == 1: - new_expr += exs - continue - exs = [expr_simp(x) for x in exs] - log_to_c_h.debug('warning: detected multi dst to same id') - log_to_c_h.debug('\t'.join([str(x) for x in exs])) - new_expr += exs out_mem = [] # first, generate mem prefetch @@ -265,9 +250,7 @@ def Expr2C(ir_arch, l, exprs, gen_exception_code=False): src_w_len = {} for k, v in src_mem.items(): src_w_len[k] = v - for e in new_expr: - - src, dst = e.src, e.dst + for dst, src in new_expr: # reload src using prefetch src = src.replace_expr(src_w_len) if dst is ir_arch.IRDst: @@ -294,7 +277,7 @@ def Expr2C(ir_arch, l, exprs, gen_exception_code=False): str_dst = str_dst.replace('MEM_LOOKUP', 'MEM_WRITE') out_mem.append('%s, %s);' % (str_dst[:-1], str_src)) - if e.dst == ir_arch.arch.pc[ir_arch.attrib]: + if dst == ir_arch.arch.pc[ir_arch.attrib]: pc_is_dst = True out_pc += ["return JIT_RET_NO_EXCEPTION;"] @@ -382,7 +365,7 @@ def ir2C(ir_arch, irbloc, lbl_done, out.append(["%s:" % irbloc.label.name]) #out.append(['printf("%s:\n");' % irbloc.label.name]) assert len(irbloc.irs) == len(irbloc.lines) - for l, exprs in zip(irbloc.lines, irbloc.irs): + for l, assignblk in zip(irbloc.lines, irbloc.irs): if l.offset not in lbl_done: e = set_pc(ir_arch, l.offset & mask_int) s1 = "%s" % translator.from_expr(patch_c_id(ir_arch.arch, e)) @@ -398,7 +381,7 @@ def ir2C(ir_arch, irbloc, lbl_done, # print l # gen pc update post_instr = "" - c_code, post_instr, _ = Expr2C(ir_arch, l, exprs, gen_exception_code) + c_code, post_instr, _ = Expr2C(ir_arch, l, assignblk, gen_exception_code) out.append(c_code + post_instr) out.append([goto_local_code ] ) return out diff --git a/miasm2/ir/symbexec.py b/miasm2/ir/symbexec.py index ba19ccf7..1dc8dde1 100644 --- a/miasm2/ir/symbexec.py +++ b/miasm2/ir/symbexec.py @@ -2,6 +2,7 @@ import miasm2.expression.expression as m2_expr from miasm2.expression.modint import int32 from miasm2.expression.simplifications import expr_simp from miasm2.core import asmbloc +from miasm2.ir.ir import AssignBlock import logging @@ -365,36 +366,38 @@ class symbexec(object): ov.append((i, self.symbols.symbols_mem[x][0])) return ov - def eval_ir_expr(self, exprs): + def eval_ir_expr(self, assignblk): + """ + Evaluate AssignBlock on the current state + @assignblk: AssignBlock instance + """ pool_out = {} eval_cache = dict(self.symbols.items()) - for e in exprs: - if not isinstance(e, m2_expr.ExprAff): - raise TypeError('not affect', str(e)) - - src = self.eval_expr(e.src, eval_cache) - if isinstance(e.dst, m2_expr.ExprMem): - a = self.eval_expr(e.dst.arg, eval_cache) + for dst, src in assignblk.iteritems(): + src = self.eval_expr(src, eval_cache) + if isinstance(dst, m2_expr.ExprMem): + a = self.eval_expr(dst.arg, eval_cache) a = self.expr_simp(a) - # search already present mem - tmp = None # test if mem lookup is known - tmp = m2_expr.ExprMem(a, e.dst.size) - dst = tmp - pool_out[dst] = src + tmp = m2_expr.ExprMem(a, dst.size) + pool_out[tmp] = src - elif isinstance(e.dst, m2_expr.ExprId): - pool_out[e.dst] = src + elif isinstance(dst, m2_expr.ExprId): + pool_out[dst] = src else: - raise ValueError("affected zarb", str(e.dst)) + raise ValueError("affected zarb", str(dst)) - return pool_out.items() + return pool_out.iteritems() - def eval_ir(self, ir): + def eval_ir(self, assignblk): + """ + Apply an AssignBlock on the current state + @assignblk: AssignBlock instance + """ mem_dst = [] - src_dst = self.eval_ir_expr(ir) + src_dst = self.eval_ir_expr(assignblk) eval_cache = dict(self.symbols.items()) for dst, src in src_dst: if isinstance(dst, m2_expr.ExprMem): @@ -414,9 +417,14 @@ class symbexec(object): mem_dst.append(dst) return mem_dst - def emulbloc(self, bloc_ir, step=False): - for ir in bloc_ir.irs: - self.eval_ir(ir) + def emulbloc(self, irb, step=False): + """ + Symbolic execution of the @irb on the current state + @irb: irbloc instance + @step: display intermediate steps + """ + for assignblk in irb.irs: + self.eval_ir(assignblk) if step: print '_' * 80 self.dump_id() @@ -461,6 +469,6 @@ class symbexec(object): # Update value if needed if isinstance(expr, m2_expr.ExprAff): - self.eval_ir([m2_expr.ExprAff(expr.dst, ret)]) + self.eval_ir(AssignBlock([m2_expr.ExprAff(expr.dst, ret)])) return ret diff --git a/test/analysis/depgraph.py b/test/analysis/depgraph.py index 211671a4..fafae1fb 100644 --- a/test/analysis/depgraph.py +++ b/test/analysis/depgraph.py @@ -2,7 +2,7 @@ from miasm2.expression.expression import ExprId, ExprInt32, ExprAff, ExprCond from miasm2.core.asmbloc import asm_label from miasm2.ir.analysis import ira -from miasm2.ir.ir import ir, irbloc +from miasm2.ir.ir import ir, irbloc, AssignBlock from miasm2.core.graph import DiGraph from miasm2.analysis.depgraph import DependencyNode, DependencyGraph,\ DependencyDict @@ -48,13 +48,20 @@ LBL4 = asm_label("lbl4") LBL5 = asm_label("lbl5") LBL6 = asm_label("lbl6") - -def gen_irbloc(lbl, exprs): +def gen_irbloc(label, exprs_list): """ Returns an IRBlock with empty lines. Used only for tests purpose """ - lines = [None for _ in xrange(len(exprs))] - return irbloc(lbl, exprs, lines) + lines = [None for _ in xrange(len(exprs_list))] + irs = [] + for exprs in exprs_list: + if isinstance(exprs, AssignBlock): + irs.append(exprs) + else: + irs.append(AssignBlock(exprs)) + + irbl = irbloc(label, irs, lines) + return irbl class Regs(object): diff --git a/test/core/sembuilder.py b/test/core/sembuilder.py index 15fa67a4..3a575727 100644 --- a/test/core/sembuilder.py +++ b/test/core/sembuilder.py @@ -57,5 +57,9 @@ for statement in res[0]: print statement print "[+] Blocks:" -for block in res[1]: - print block +for irb in res[1]: + print irb.label + for exprs in irb.irs: + for expr in exprs: + print expr + print diff --git a/test/ir/analysis.py b/test/ir/analysis.py index 49daa70e..913d9c56 100755 --- a/test/ir/analysis.py +++ b/test/ir/analysis.py @@ -2,7 +2,7 @@ from miasm2.expression.expression import ExprId, ExprInt32, ExprAff, ExprMem from miasm2.core.asmbloc import asm_label from miasm2.ir.analysis import ira -from miasm2.ir.ir import irbloc +from miasm2.ir.ir import irbloc, AssignBlock a = ExprId("a") b = ExprId("b") @@ -33,9 +33,16 @@ LBL6 = asm_label("lbl6") -def gen_irbloc(label, exprs): - lines = [None for _ in xrange(len(exprs))] - irbl = irbloc(label, exprs, lines) +def gen_irbloc(label, exprs_list): + lines = [None for _ in xrange(len(exprs_list))] + irs = [] + for exprs in exprs_list: + if isinstance(exprs, AssignBlock): + irs.append(exprs) + else: + irs.append(AssignBlock(exprs)) + + irbl = irbloc(label, irs, lines) return irbl @@ -671,12 +678,4 @@ for test_nb, test in enumerate([(G1_IRA, G1_EXP_IRA), # Check that each expr in the blocs are the same for lbl, irb in g_ira.blocs.iteritems(): exp_irb = g_exp_ira.blocs[lbl] - assert len(irb.irs) == len(exp_irb.irs), "(%s) %d / %d" %( - lbl, len(irb.irs), len(exp_irb.irs)) - for i in xrange(0, len(exp_irb.irs)): - assert len(irb.irs[i]) == len(exp_irb.irs[i]), "(%s:%d) %d / %d" %( - lbl, i, len(irb.irs[i]), len(exp_irb.irs[i])) - for s_instr in xrange(len(irb.irs[i])): - assert irb.irs[i][s_instr] == exp_irb.irs[i][s_instr],\ - "(%s:%d) %s / %s" %( - lbl, i, irb.irs[i][s_instr], exp_irb.irs[i][s_instr]) + assert exp_irb.irs == irb.irs |