about summary refs log tree commit diff stats
path: root/miasm2/arch
diff options
context:
space:
mode:
authorFabrice Desclaux <fabrice.desclaux@cea.fr>2015-04-01 15:58:29 +0200
committerFabrice Desclaux <fabrice.desclaux@cea.fr>2015-04-01 23:47:37 +0200
commit53d82c13f7da6851196e69c67841af24bcf218b2 (patch)
tree36a74eb31953b449544dfc6eedd8e61a1be7a5eb /miasm2/arch
parent5a6145c5ea3a1df1e666224962dc3ba685327a12 (diff)
downloadmiasm-53d82c13f7da6851196e69c67841af24bcf218b2.tar.gz
miasm-53d82c13f7da6851196e69c67841af24bcf218b2.zip
Cpu: modify instructions' offset relative encoding
The assembler will automatically use instruction len in offset computation

In the following instruction:
0x10: EB 02   JMP 0x14

If we assemble this instruction, the requested instruction send to the assembler
engine will be:
JMP +0x4

And will be encoded to:
EB 02

Previously, the assembly of:
JMP +0x4
was:
EB 04
Diffstat (limited to '')
-rw-r--r--miasm2/arch/arm/arch.py61
-rw-r--r--miasm2/arch/mips32/arch.py8
-rw-r--r--miasm2/arch/msp430/arch.py21
-rw-r--r--miasm2/arch/msp430/ira.py2
-rw-r--r--miasm2/arch/x86/arch.py72
5 files changed, 107 insertions, 57 deletions
diff --git a/miasm2/arch/arm/arch.py b/miasm2/arch/arm/arch.py
index 539b4778..a70718d9 100644
--- a/miasm2/arch/arm/arch.py
+++ b/miasm2/arch/arm/arch.py
@@ -363,9 +363,9 @@ class instruction_arm(instruction):
         if not isinstance(e, ExprInt):
             return
         if self.name == 'BLX':
-            ad = e.arg + 8 + self.offset
+            ad = e.arg + self.offset
         else:
-            ad = e.arg + 8 + self.offset
+            ad = e.arg + self.offset
         l = symbol_pool.getby_offset_create(ad)
         s = ExprId(l, e.size)
         self.args[0] = s
@@ -406,8 +406,7 @@ class instruction_arm(instruction):
         if not isinstance(e, ExprInt):
             log.debug('dyn dst %r', e)
             return
-        # Can't find the +4 reason in doc
-        off = e.arg - (self.offset + 4 + self.l)
+        off = e.arg - self.offset
         if int(off % 4):
             raise ValueError('strange offset! %r' % off)
         self.args[0] = ExprInt32(off)
@@ -438,9 +437,9 @@ class instruction_armt(instruction_arm):
         if not isinstance(e, ExprInt):
             return
         if self.name == 'BLX':
-            ad = e.arg + 4 + (self.offset & 0xfffffffc)
+            ad = e.arg + (self.offset & 0xfffffffc)
         else:
-            ad = e.arg + 4 + self.offset
+            ad = e.arg + self.offset
         l = symbol_pool.getby_offset_create(ad)
         s = ExprId(l, e.size)
         if self.name in ["CBZ", "CBNZ"]:
@@ -480,7 +479,7 @@ class instruction_armt(instruction_arm):
         # The first +2 is to compensate instruction len, but strangely, 32 bits
         # thumb2 instructions len is 2... For the second +2, didn't find it in
         # the doc.
-        off = e.arg - (self.offset + 2 + 2)
+        off = e.arg - self.offset
         if int(off % 2):
             raise ValueError('strange offset! %r' % off)
         self.args[0] = ExprInt32(off)
@@ -787,12 +786,17 @@ class arm_offs(arm_imm):
         return ExprInt_fromsize(self.intsize, v)
 
     def decodeval(self, v):
-        return v << 2
+        v <<= 2
+        # Add pipeline offset
+        v += 8
+        return v
 
     def encodeval(self, v):
-        if v%4 == 0:
-            return v >> 2
-        return False
+        if v%4 != 0:
+            return False
+        # Remove pipeline offset
+        v -= 8
+        return v >> 2
 
     def decode(self, v):
         v = v & self.lmask
@@ -1202,13 +1206,16 @@ class arm_offs_blx(arm_imm):
         v = v & self.lmask
         v = (v << 2) + (self.parent.lowb.value << 1)
         v = sign_ext(v, 26, 32)
+        # Add pipeline offset
+        v += 8
         self.expr = ExprInt32(v)
         return True
 
     def encode(self):
         if not isinstance(self.expr, ExprInt):
             return False
-        v = self.expr.arg.arg
+        # Remove pipeline offset
+        v = int(self.expr.arg - 8)
         if v & 0x80000000:
             v &= (1 << 26) - 1
         self.parent.lowb.value = (v >> 1) & 1
@@ -1635,28 +1642,17 @@ class arm_offsp(arm_offpc):
 class arm_offspc(arm_offs):
 
     def decodeval(self, v):
-        return v << 1
-
-    def encodeval(self, v):
-        return v >> 1
-
-
-class arm_offspchl(arm_offs):
-
-    def decodeval(self, v):
-        if self.parent.hl.value == 0:
-            return v << 12
-        else:
-            return v << 1
+        v = v << 1
+        # Add pipeline offset
+        v += 2 + 2
+        return v
 
     def encodeval(self, v):
-        if v > (1 << 12):
-            self.parent.hl.value = 0
-            v >>= 12
-        else:
-            self.parent.hl.value = 1
-            v >>= 1
-        return v
+        # Remove pipeline offset
+        v -= 2 + 2
+        if v % 2 == 0:
+            return v >> 1
+        return False
 
 
 class arm_off8sppc(arm_imm):
@@ -1907,7 +1903,6 @@ rbl_wb = bs(l=3, cls=(armt_reg_wb,), fname='rb')
 offs8 = bs(l=8, cls=(arm_offspc,), fname="offs")
 offs11 = bs(l=11, cls=(arm_offspc,), fname="offs")
 
-offs11hl = bs(l=11, cls=(arm_offspchl,), fname="offs")
 hl = bs(l=1, prio=default_prio + 1, fname='hl')
 off8sppc = bs(l=8, cls=(arm_off8sppc,), fname="off")
 
diff --git a/miasm2/arch/mips32/arch.py b/miasm2/arch/mips32/arch.py
index 60b0f5d2..12f4ff8e 100644
--- a/miasm2/arch/mips32/arch.py
+++ b/miasm2/arch/mips32/arch.py
@@ -179,7 +179,7 @@ class instruction_mips32(cpu.instruction):
             raise ValueError('symbol not resolved %s' % self.l)
         if not isinstance(e, ExprInt):
             return
-        off = e.arg - (self.offset + self.l)
+        off = e.arg - self.offset
         print "diff", e, hex(self.offset)
         print hex(off)
         if int(off % 4):
@@ -327,13 +327,15 @@ class mips32_soff_noarg(mips32_imm):
         v = v & self.lmask
         v <<= 2
         v = cpu.sign_ext(v, 16+2, 32)
-        self.expr = ExprInt32(v)
+        # Add pipeline offset
+        self.expr = ExprInt32(v + 4)
         return True
 
     def encode(self):
         if not isinstance(self.expr, ExprInt):
             return False
-        v = self.expr.arg.arg
+        # Remove pipeline offset
+        v = int(self.expr.arg - 4)
         if v & 0x80000000:
             nv = v & ((1 << 16+2) - 1)
             assert( v == cpu.sign_ext(nv, 16+2, 32))
diff --git a/miasm2/arch/msp430/arch.py b/miasm2/arch/msp430/arch.py
index 07a11ae8..2cac7260 100644
--- a/miasm2/arch/msp430/arch.py
+++ b/miasm2/arch/msp430/arch.py
@@ -144,7 +144,7 @@ class instruction_msp430(instruction):
         if self.name == "call":
             ad = e.arg
         else:
-            ad = e.arg + int(self.offset) + self.l
+            ad = e.arg + int(self.offset)
 
         l = symbol_pool.getby_offset_create(ad)
         s = ExprId(l, e.size)
@@ -188,7 +188,11 @@ class instruction_msp430(instruction):
             # raise ValueError('dst must be int or label')
             log.warning('dynamic dst %r', e)
             return
-        self.args[0] = ExprInt_fromsize(16, (e.arg - (self.offset + self.l))/2)
+
+        # Call argument is an absolute offset
+        # Other offsets are relative to instruction offset
+        if self.name != "call":
+            self.args[0] =  ExprInt_fromsize(16, e.arg - self.offset)
 
     def get_info(self, c):
         pass
@@ -522,9 +526,16 @@ class msp430_offs(imm_noarg, m_arg):
         return ExprInt_fromsize(16, v)
 
     def decodeval(self, v):
-        return v << 1
+        v <<= 1
+        v += self.parent.l
+        return v
 
     def encodeval(self, v):
+        plen = self.parent.l + self.l
+        assert(plen % 8 == 0)
+        v -= plen / 8
+        if v % 2 != 0:
+            return False
         return v >> 1
 
     def decode(self, v):
@@ -574,8 +585,8 @@ bs_f2_nobw = bs_name(l=3, name={'swpb': 1, 'sxt': 3,
                                 'call': 5})
 addop("f2_2", [bs('000100'), bs_f2_nobw, bs('0'), a_s, sreg, off_s])
 
-
-offimm = bs(l=10, cls=(msp430_offs,), fname="offs")
+# Offset must be decoded in last position to have final instruction len
+offimm = bs(l=10, cls=(msp430_offs,), fname="offs", order=-1)
 
 bs_f2_jcc = bs_name(l=3, name={'jnz': 0, 'jz': 1, 'jnc': 2, 'jc': 3, 'jn': 4,
                                'jge': 5, 'jl': 6, 'jmp': 7})
diff --git a/miasm2/arch/msp430/ira.py b/miasm2/arch/msp430/ira.py
index ea8bdc2c..26a53a1e 100644
--- a/miasm2/arch/msp430/ira.py
+++ b/miasm2/arch/msp430/ira.py
@@ -65,7 +65,7 @@ class ir_a_msp430(ir_a_msp430_base):
             lbl = bloc.get_next()
             new_lbl = self.gen_label()
             irs = self.call_effects(pc_val)
-            irs.append([ExprAff(IRDst, ExprId(lbl, size=self.pc.size))])
+            irs.append([ExprAff(self.IRDst, ExprId(lbl, size=self.pc.size))])
             nbloc = irbloc(new_lbl, irs)
             nbloc.lines = [l]
             self.blocs[new_lbl] = nbloc
diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py
index 3b714f79..f168d9cb 100644
--- a/miasm2/arch/x86/arch.py
+++ b/miasm2/arch/x86/arch.py
@@ -491,7 +491,7 @@ class instruction_x86(instruction):
             if not isinstance(e.name, asm_label) and e not in all_regs_ids:
                 raise ValueError("ExprId must be a label or a register")
         elif isinstance(e, ExprInt):
-            ad = e.arg + int(self.offset) + self.l
+            ad = e.arg + int(self.offset)
             l = symbol_pool.getby_offset_create(ad)
             s = ExprId(l, e.size)
             self.args[0] = s
@@ -558,7 +558,7 @@ class instruction_x86(instruction):
             return
         # return ExprInt32(e.arg - (self.offset + self.l))
         self.args[0] = ExprInt_fromsize(
-            self.mode, e.arg - (self.offset + self.l))
+            self.mode, e.arg - self.offset)
 
     def get_info(self, c):
         self.additional_info.g1.value = c.g1.value
@@ -846,9 +846,9 @@ class mn_x86(cls_mn):
     def post_asm(self, v):
         return v
 
-    def encodefields(self, decoded):
-        v = super(mn_x86, self).encodefields(decoded)
 
+    def gen_prefix(self):
+        v = ""
         rex = 0x40
         if self.g1.value is None:
             self.g1.value = 0
@@ -890,9 +890,15 @@ class mn_x86(cls_mn):
             if hasattr(self, 'no_xmm_pref'):
                 return None
             v = "\x66" + v
-
         return v
 
+    def encodefields(self, decoded):
+        v = super(mn_x86, self).encodefields(decoded)
+        prefix = self.gen_prefix()
+        if prefix is None:
+            return None
+        return prefix + v
+
     def getnextflow(self, symbol_pool):
         raise NotImplementedError('not fully functional')
 
@@ -2834,19 +2840,54 @@ class bs_rel_off(bs_cond_imm):  # m_arg):
             # else:
             #    self.l = 32
         l = offsize(self.parent)
+        prefix = self.parent.gen_prefix()
+        parent_len = len(prefix) * 8 + self.parent.l + self.l
+        assert(parent_len % 8 == 0)
 
-        # l = self.parent.v_opmode()#self.parent.args[0].expr.size
-        # print 'imm enc', l, self.parent.rex_w.value
+        v = int(self.expr.arg - parent_len/8)
+        if prefix is None:
+            raise StopIteration
+        mask = ((1 << self.l) - 1)
+        if self.l > l:
+            raise StopIteration
+        if v != sign_ext(v & mask, self.l, l):
+            raise StopIteration
+        self.value = swap_uint(self.l, v & ((1 << self.l) - 1))
+        yield True
+
+    def decode(self, v):
+        v = swap_uint(self.l, v)
+        size = offsize(self.parent)
+        v = sign_ext(v, self.l, size)
+        v += self.parent.l
+        v = ExprInt_fromsize(size, v)
+        self.expr = v
+        return True
+
+class bs_s08(bs_rel_off):
+    parser = int_or_expr
+
+    @classmethod
+    def flen(cls, mode, v):
+        return 8
+
+    def encode(self):
+        if not isinstance(self.expr, ExprInt):
+            raise StopIteration
+        arg0_expr = self.parent.args[0].expr
+        if self.l != 0:
+            l = self.l
+        else:
+            l = self.parent.v_opmode()
+            self.l = l
+        l = offsize(self.parent)
         v = int(self.expr.arg)
         mask = ((1 << self.l) - 1)
-        # print 'ext', self.l, l, hex(v), hex(sign_ext(v & ((1<<self.l)-1),
-        # self.l, l))
         if self.l > l:
             raise StopIteration
         if v != sign_ext(v & mask, self.l, l):
             raise StopIteration
         self.value = swap_uint(self.l, v & ((1 << self.l) - 1))
-        # print hex(self.value)
         yield True
 
     def decode(self, v):
@@ -2855,7 +2896,6 @@ class bs_rel_off(bs_cond_imm):  # m_arg):
         v = sign_ext(v, self.l, size)
         v = ExprInt_fromsize(size, v)
         self.expr = v
-        # print self.expr, repr(self.expr)
         return True
 
 
@@ -3002,7 +3042,6 @@ class bs_msegoff(m_arg):
         except StopIteration:
             return None, None
         e = v[0]
-        print "XXX", e
         if e is None:
             log.debug('cannot fromstring int %r', s)
             return None, None
@@ -3075,6 +3114,7 @@ sib_base = bs(l=3, cls=(bs_cond_index,), fname = "sib_base")
 
 disp = bs(l=0, cls=(bs_cond_disp,), fname = "disp")
 
+s08 = bs(l=8, cls=(bs_s08, ))
 
 u08 = bs(l=8, cls=(x86_08, m_arg))
 u07 = bs(l=7, cls=(x86_08, m_arg))
@@ -3130,8 +3170,10 @@ d_ss = bs(l=0, cls=(bs_ss, ), fname='ss')
 d_fs = bs(l=0, cls=(bs_fs, ), fname='fs')
 d_gs = bs(l=0, cls=(bs_gs, ), fname='gs')
 
-rel_off = bs(l=0, cls=(bs_rel_off,), fname="off")
-rel_off08 = bs(l=8, cls=(bs_rel_off08,), fname="off")
+# Offset must be decoded in last position to have final instruction len
+rel_off = bs(l=0, cls=(bs_rel_off,), fname="off", order=-1)
+# Offset must be decoded in last position to have final instruction len
+rel_off08 = bs(l=8, cls=(bs_rel_off08,), fname="off", order=-1)
 moff = bs(l=0, cls=(bs_moff,), fname="off")
 msegoff = bs(l=16, cls=(bs_msegoff,), fname="mseg")
 movoff = bs(l=0, cls=(bs_movoff,), fname="off")
@@ -3737,7 +3779,7 @@ addop("prefetchnta", [bs8(0x0f), bs8(0x18)] + rmmod(d0, rm_arg_m08))
 
 addop("push", [bs8(0xff), stk] + rmmod(d6))
 addop("push", [bs("01010"), stk, reg])
-addop("push", [bs8(0x6a), rel_off08, stk])
+addop("push", [bs8(0x6a), s08, stk])
 addop("push", [bs8(0x68), d_imm, stk])
 addop("push", [bs8(0x0e), stk, d_cs])
 addop("push", [bs8(0x16), stk, d_ss])