diff options
| -rw-r--r-- | example/disasm/full.py | 11 | ||||
| -rw-r--r-- | miasm2/expression/modint.py | 16 | ||||
| -rw-r--r-- | miasm2/jitter/codegen.py | 16 | ||||
| -rw-r--r-- | miasm2/jitter/llvmconvert.py | 46 | ||||
| -rw-r--r-- | test/expression/modint.py | 15 | ||||
| -rw-r--r-- | test/jitter/jit_options.py | 31 |
6 files changed, 92 insertions, 43 deletions
diff --git a/example/disasm/full.py b/example/disasm/full.py index 7ff60d3b..f15b59eb 100644 --- a/example/disasm/full.py +++ b/example/disasm/full.py @@ -50,6 +50,8 @@ parser.add_argument('-a', "--try-disasm-all", action="store_true", help="Try to disassemble the whole binary") parser.add_argument('-i', "--image", action="store_true", help="Display image representation of disasm") +parser.add_argument('-c', "--rawbinary", default=False, action="store_true", + help="Don't interpret input as ELF/PE/...") args = parser.parse_args() @@ -57,8 +59,13 @@ if args.verbose: log_asmbloc.setLevel(logging.DEBUG) log.info('Load binary') -with open(args.filename) as fdesc: - cont = Container.from_stream(fdesc, addr=args.shiftoffset) +if args.rawbinary: + shift = args.shiftoffset if args.shiftoffset is not None else 0 + cont = Container.fallback_container(open(args.filename).read(), + None, addr=shift) +else: + with open(args.filename) as fdesc: + cont = Container.from_stream(fdesc, addr=args.shiftoffset) default_addr = cont.entry_point bs = cont.bin_stream diff --git a/miasm2/expression/modint.py b/miasm2/expression/modint.py index 90dabfac..b6a0e4ee 100644 --- a/miasm2/expression/modint.py +++ b/miasm2/expression/modint.py @@ -41,11 +41,15 @@ class moduint(object): return self.__class__(self.arg & y) def __div__(self, y): + # Python: 8 / -7 == -2 (C-like: -1) + # int(float) trick cannot be used, due to information loss + den = int(y) + num = int(self) + result_sign = 1 if (den * num) >= 0 else -1 + cls = self.__class__ if isinstance(y, moduint): cls = self.maxcast(y) - return cls(int(float(self.arg) / y.arg)) - else: - return self.__class__(int(float(self.arg) / y)) + return ((abs(num) / abs(den)) * result_sign) def __int__(self): return int(self.arg) @@ -64,11 +68,11 @@ class moduint(object): return self.__class__(self.arg << y) def __mod__(self, y): + # See __div__ for implementation choice + cls = self.__class__ if isinstance(y, moduint): cls = self.maxcast(y) - return cls(self.arg - (y.arg * int(float(self.arg)/y.arg))) - else: - return self.__class__(self.arg - (y * int(float(self.arg)/y))) + return cls(self.arg - (y * (self / y))) def __mul__(self, y): if isinstance(y, moduint): diff --git a/miasm2/jitter/codegen.py b/miasm2/jitter/codegen.py index fff707e5..2503e104 100644 --- a/miasm2/jitter/codegen.py +++ b/miasm2/jitter/codegen.py @@ -324,10 +324,20 @@ class CGen(object): return out def gen_goto_code(self, attrib, instr_offsets, dst): + if isinstance(dst, asm_label) and dst.offset is None: + # Generate goto for local labels + return ['goto %s;' % dst.name] + offset = None + if isinstance(dst, asm_label) and dst.offset is not None: + offset = dst.offset + elif isinstance(dst, (int, long)): + offset = dst out = [] - if isinstance(dst, asm_label): - out.append('goto %s;' % dst.name) - elif dst in instr_offsets: + if (offset is not None and + offset > attrib.instr.offset and + offset in instr_offsets): + # Only generate goto for next instructions. + # (consecutive instructions) lbl = self.ir_arch.symbol_pool.getby_offset_create(dst) out += self.gen_post_code(attrib) out += self.gen_post_instr_checks(attrib, dst) diff --git a/miasm2/jitter/llvmconvert.py b/miasm2/jitter/llvmconvert.py index 32d4764c..bc04689c 100644 --- a/miasm2/jitter/llvmconvert.py +++ b/miasm2/jitter/llvmconvert.py @@ -1050,15 +1050,18 @@ class LLVMFunction(): evaluated = self.add_ir(to_eval) return case2dst, evaluated - def gen_jump2dst(self, attrib, dst): + def gen_jump2dst(self, attrib, instr_offsets, dst): """Generate the code for a jump to @dst with final check for error Several cases have to be considered: - jump to an offset out of the current ASM BBL (JMP 0x11223344) - jump to an offset inside the current ASM BBL (Go to next instruction) + - jump to an offset back in the current ASM BBL (For max_exec jit + option on self loops) - jump to a generated IR label, which must be jitted in this same - function (REP MOVSB) - - jump to a computed offset (CALL @32[0x11223344]) + function (REP MOVSB) + - jump to a computed offset (CALL @32[0x11223344]) + """ PC = self.llvm_context.PC # We are no longer in the main stream, deactivate cache @@ -1070,18 +1073,27 @@ class LLVMFunction(): if m2_asmbloc.expr_is_label(dst): bbl = self.get_basic_bloc_by_label(dst) + offset = dst.name.offset if bbl is not None: # "local" jump, inside this function - if dst.name.offset is not None: + if offset is None: # Avoid checks on generated label + self.builder.branch(bbl) + return + + if (offset in instr_offsets and + offset > attrib.instr.offset): + # forward local jump (ie. next instruction) self.gen_post_code(attrib) - self.gen_post_instr_checks(attrib, dst.name.offset) - self.builder.branch(bbl) - return - else: - # "extern" jump on a defined offset, return to the caller - offset = dst.name.offset - dst = self.add_ir(m2_expr.ExprInt(offset, PC.size)) + self.gen_post_instr_checks(attrib, offset) + self.builder.branch(bbl) + return + + # reaching this point means a backward local jump, promote it to + # extern + + # "extern" jump on a defined offset, return to the caller + dst = self.add_ir(m2_expr.ExprInt(offset, PC.size)) # "extern" jump with a computed value, return to the caller assert isinstance(dst, (llvm_ir.Instruction, llvm_ir.Value)) @@ -1097,12 +1109,13 @@ class LLVMFunction(): self.set_ret(dst) - def gen_irblock(self, attrib, instr, irblock): + def gen_irblock(self, attrib, instr, instr_offsets, irblock): """ Generate the code for an @irblock @instr: the current instruction to translate @irblock: an irbloc instance @attrib: an Attributs instance + @instr_offsets: offset of all asmblock's instructions """ case2dst = None @@ -1158,7 +1171,7 @@ class LLVMFunction(): assert case2dst is not None if len(case2dst) == 1: # Avoid switch in this common case - self.gen_jump2dst(attrib, case2dst.values()[0]) + self.gen_jump2dst(attrib, instr_offsets, case2dst.values()[0]) else: current_bbl = self.builder.basic_block @@ -1170,7 +1183,7 @@ class LLVMFunction(): bbl = self.append_basic_block(name) case2bbl[case] = bbl self.builder.position_at_start(bbl) - self.gen_jump2dst(attrib, dst) + self.gen_jump2dst(attrib, instr_offsets, dst) # Jump on the correct output self.builder.position_at_end(current_bbl) @@ -1279,6 +1292,7 @@ class LLVMFunction(): # TODO: merge duplicate code with CGen codegen = self.llvm_context.cgen_class(self.llvm_context.ir_arch) irblocks_list = codegen.block2assignblks(asmblock) + instr_offsets = [line.offset for line in asmblock.lines] # Prepare for delayslot if self.llvm_context.has_delayslot: @@ -1288,11 +1302,13 @@ class LLVMFunction(): default_value=eltype(0)) self.local_vars_pointers[element.name] = ptr lbl = codegen.get_block_post_label(asmblock) + instr_offsets.append(lbl.offset) self.append_basic_block(lbl) # Add content builder.position_at_end(entry_bbl) + for instr, irblocks in zip(asmblock.lines, irblocks_list): attrib = codegen.get_attributes(instr, irblocks, self.log_mn, self.log_regs) @@ -1312,7 +1328,7 @@ class LLVMFunction(): if index == 0: self.gen_pre_code(attrib) - self.gen_irblock(attrib, instr, irblock) + self.gen_irblock(attrib, instr, instr_offsets, irblock) # Gen finalize (see codegen::CGen) is unrecheable, except with delayslot self.gen_finalize(asmblock, codegen) diff --git a/test/expression/modint.py b/test/expression/modint.py index e7c19d0c..17c12907 100644 --- a/test/expression/modint.py +++ b/test/expression/modint.py @@ -8,7 +8,7 @@ d = uint1(0) e = uint1(1) f = uint8(0x1) - +g = int8(-3) print a, b, c print a + b, a + c, b + c @@ -53,7 +53,20 @@ assert(f ^ f == 0) assert(f ^ 0 == f) assert(0 ^ f == f) assert(1 ^ f == 0) +assert(c / g == -1) +assert(c / -3 == -1) +assert(c % g == 1) +assert(c % -3 == 1) print e + c, c + e, c - e, e - c print 1000 * a print hex(a) + +define_int(128) +define_uint(128) +h = uint128(0x11223344556677889900AABBCCDDEEFF) +i = int128(-0x9900AABBCCDDEEFF1122334455667788) + +assert(i / h == 6) +assert(i % h == 0x3221aa32bb43cd58d9cc54dd65ee7e) + diff --git a/test/jitter/jit_options.py b/test/jitter/jit_options.py index cc955c64..4fe936d5 100644 --- a/test/jitter/jit_options.py +++ b/test/jitter/jit_options.py @@ -5,18 +5,18 @@ from miasm2.analysis.machine import Machine from pdb import pm # Shellcode - # main: -# MOV EAX, 0x1 +# MOV EAX, 0x10 +# MOV EBX, 0x1 # loop_main: -# CMP EAX, 0x10 -# JZ loop_end -# loop_inc: -# INC EAX -# JMP loop_main +# SUB EAX, 0x1 +# CMOVZ ECX, EBX +# JNZ loop_main # loop_end: # RET -data = "b80100000083f810740340ebf8c3".decode("hex") + + +data = "b810000000bb0100000083e8010f44cb75f8c3".decode("hex") run_addr = 0x40000000 def code_sentinelle(jitter): @@ -47,10 +47,10 @@ myjit.init_run(run_addr) myjit.continue_run() assert myjit.run is False -assert myjit.cpu.EAX == 0x10 +assert myjit.cpu.EAX == 0x0 ## Let's specify a max_exec_per_call -## 5: main, loop_main, loop_inc, loop_main, loop_inc +## 5: main/loop_main, loop_main myjit.jit.options["max_exec_per_call"] = 5 first_call = True @@ -71,8 +71,8 @@ myjit.exec_cb = cb myjit.continue_run() assert myjit.run is True -# Use a '<=' because it's a 'max_...' -assert myjit.cpu.EAX <= 3 +# Use a '>=' because it's a 'max_...' +assert myjit.cpu.EAX >= 0xA # Test 'jit_maxline' print "[+] Run instr one by one" @@ -91,7 +91,6 @@ myjit.exec_cb = cb myjit.continue_run() assert myjit.run is False -assert myjit.cpu.EAX == 0x10 -## dry(1) + main(1) + (loop_main(2) + loop_inc(2))*(0x10 - 1) + loop_main(2) + -## loop_end(1) = 65 -assert counter == 65 +assert myjit.cpu.EAX == 0x00 +## main(2) + (loop_main(3))*(0x10) + loop_end(1) + 0x1337beef (1) +assert counter == 52 |