about summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--.appveyor.yml11
-rw-r--r--.codespell_ignore2
-rw-r--r--.travis.yml6
-rw-r--r--README.md7
-rw-r--r--example/disasm/full.py12
-rw-r--r--example/jitter/unpack_upx.py48
-rw-r--r--miasm/analysis/data_flow.py1089
-rw-r--r--miasm/analysis/gdbserver.py4
-rw-r--r--miasm/analysis/sandbox.py3
-rw-r--r--miasm/analysis/simplifier.py33
-rw-r--r--miasm/arch/arm/arch.py58
-rw-r--r--miasm/arch/arm/regs.py65
-rw-r--r--miasm/arch/arm/sem.py242
-rw-r--r--miasm/arch/mep/sem.py2
-rw-r--r--miasm/arch/mips32/arch.py73
-rw-r--r--miasm/arch/mips32/regs.py28
-rw-r--r--miasm/arch/mips32/sem.py141
-rw-r--r--miasm/arch/ppc/arch.py40
-rw-r--r--miasm/arch/ppc/regs.py12
-rw-r--r--miasm/arch/ppc/sem.py50
-rw-r--r--miasm/arch/x86/arch.py9
-rw-r--r--miasm/arch/x86/sem.py4
-rw-r--r--miasm/core/asmblock.py158
-rw-r--r--miasm/core/bin_stream.py2
-rw-r--r--miasm/core/cpu.py3
-rw-r--r--miasm/core/utils.py12
-rw-r--r--miasm/expression/expression.py6
-rw-r--r--miasm/expression/simplifications.py2
-rw-r--r--miasm/expression/simplifications_common.py68
-rw-r--r--miasm/ir/ir.py2
-rw-r--r--miasm/ir/symbexec.py2
-rw-r--r--miasm/ir/translators/z3_ir.py2
-rw-r--r--miasm/jitter/arch/JitCore_mips32.h56
-rw-r--r--miasm/jitter/arch/JitCore_ppc32_regs.h69
-rw-r--r--miasm/jitter/bn.h2
-rw-r--r--miasm/jitter/jitcore.py5
-rw-r--r--miasm/jitter/jitcore_cc_base.py7
-rw-r--r--miasm/jitter/jitcore_gcc.py3
-rw-r--r--miasm/jitter/jitcore_llvm.py12
-rw-r--r--miasm/jitter/jitload.py22
-rw-r--r--miasm/jitter/loader/pe.py11
-rw-r--r--miasm/loader/elf_init.py2
-rw-r--r--miasm/loader/new_cstruct.py6
-rw-r--r--miasm/loader/pe.py40
-rw-r--r--miasm/os_dep/common.py8
-rw-r--r--miasm/os_dep/linux/syscall.py2
-rw-r--r--miasm/os_dep/win_api_x86_32.py414
-rw-r--r--miasm/os_dep/win_api_x86_32_seh.py21
-rw-r--r--miasm/runtime/divti3.c36
-rw-r--r--miasm/runtime/export.h10
-rw-r--r--miasm/runtime/int_endianness.h114
-rw-r--r--miasm/runtime/int_lib.h148
-rw-r--r--miasm/runtime/int_types.h174
-rw-r--r--miasm/runtime/int_util.h31
-rw-r--r--miasm/runtime/udivmodti4.c196
-rw-r--r--miasm/runtime/udivti3.c24
-rw-r--r--optional_requirements.txt2
-rw-r--r--setup.py61
-rw-r--r--test/arch/arm/arch.py9
-rw-r--r--test/arch/mips32/arch.py38
-rw-r--r--test/arch/ppc32/arch.py8
-rwxr-xr-xtest/test_all.py3
-rw-r--r--test/utils/testset.py2
63 files changed, 3021 insertions, 711 deletions
diff --git a/.appveyor.yml b/.appveyor.yml
index 3023dc8f..a518626f 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -19,6 +19,17 @@ environment:
       PYTHON: c:\Python27-x64
       PYTHON_VERSION: "2.7.x"
 
+    - platform: Win32
+      APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
+      PLATFORM_TOOLSET: v141
+      PYTHON: c:\Python38
+      PYTHON_VERSION: "3.8.x"
+
+    - platform: x64
+      APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
+      PLATFORM_TOOLSET: v141
+      PYTHON: c:\Python38-x64
+      PYTHON_VERSION: "3.8.x"
       # on_finish:
       #  - ps: $blockRdp = $true; iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1'))
 
diff --git a/.codespell_ignore b/.codespell_ignore
index 8b2b019f..5e7feec6 100644
--- a/.codespell_ignore
+++ b/.codespell_ignore
@@ -9,3 +9,5 @@ rela
 daa
 od
 blocs
+fpr
+seh
diff --git a/.travis.yml b/.travis.yml
index 3a9d4b07..d3ad7ee5 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -24,7 +24,8 @@ env:
     - MIASM_TEST_EXTRA_ARG="-o python -t qemu,long"
     - MIASM_TEST_EXTRA_ARG="-o z3"
     - MIASM_TEST_EXTRA_ARG="-o cparser"
-    - MIASM_EXTENTED_TESTS="ls_x64"
+    - MIASM_EXTENTED_TESTS_LS_X64="ls_x64"
+    - MIASM_EXTENTED_TESTS_LOADER="loader"
 before_script:
 - pip install -r optional_requirements.txt
 # codespell
@@ -36,4 +37,5 @@ before_script:
 - git clone https://github.com/cea-sec/miasm-extended-tests
 script:
   - test -z "$MIASM_TEST_EXTRA_ARG" || (cd test && flags=""; python --version |& grep -q "Python 3" || flags="-W error"; python $flags test_all.py $MIASM_TEST_EXTRA_ARG && git ls-files -o --exclude-standard)
-  - test -z "$MIASM_EXTENTED_TESTS" || (cd "miasm-extended-tests/$MIASM_EXTENTED_TESTS" && ./run.sh "$TRAVIS_BUILD_DIR")
+  - test -z "$MIASM_EXTENTED_TESTS_LS_x64" || (cd "miasm-extended-tests/$MIASM_EXTENTED_TESTS_LS_X64" && ./run.sh "$TRAVIS_BUILD_DIR")
+  - test -z "$MIASM_EXTENTED_TESTS_LOADER" || (cd "miasm-extended-tests/$MIASM_EXTENTED_TESTS_LOADER" && ./test_dll.py)
diff --git a/README.md b/README.md
index a4c55a20..f80d02a3 100644
--- a/README.md
+++ b/README.md
@@ -616,10 +616,3 @@ Books
 
 * [Practical Reverse Engineering: X86, X64, Arm, Windows Kernel, Reversing Tools, and Obfuscation](http://eu.wiley.com/WileyCDA/WileyTitle/productCd-1118787315,subjectCd-CSJ0.html): Introduction to Miasm (Chapter 5 "Obfuscation")
 * [BlackHat Python - Appendix](https://github.com/oreilly-japan/black-hat-python-jp-support/tree/master/appendix-A): Japan security book's samples
-
-
-Misc
-====
-
-* Man, does miasm has a link with rr0d?
-* Yes! crappy code and uggly documentation.
diff --git a/example/disasm/full.py b/example/disasm/full.py
index d4fae867..57263a6f 100644
--- a/example/disasm/full.py
+++ b/example/disasm/full.py
@@ -10,8 +10,7 @@ from miasm.core.asmblock import log_asmblock, AsmCFG
 from miasm.core.interval import interval
 from miasm.analysis.machine import Machine
 from miasm.analysis.data_flow import \
-    DiGraphDefUse, ReachingDefinitions, \
-    replace_stack_vars, load_from_int, del_unused_edges
+    DiGraphDefUse, ReachingDefinitions, load_from_int
 from miasm.expression.simplifications import expr_simp
 from miasm.analysis.ssa import SSADiGraph
 from miasm.ir.ir import AssignBlock, IRBlock
@@ -65,10 +64,6 @@ parser.add_argument('-p', "--ssa", action="store_true",
                     help="Generate the ssa form in  'ssa.dot'.")
 parser.add_argument('-x', "--propagexpr", action="store_true",
                     help="Do Expression propagation.")
-parser.add_argument('-y', "--stack2var", action="store_true",
-                    help="*Try* to do transform stack accesses into variables. "
-                    "Use only with --propagexpr option. "
-                    "WARNING: not reliable, may fail.")
 parser.add_argument('-e', "--loadint", action="store_true",
                     help="Load integers from binary in fixed memory lookup.")
 parser.add_argument('-j', "--calldontmodstack", action="store_true",
@@ -309,16 +304,11 @@ if args.propagexpr:
             ssa = self.do_simplify_loop(ssa, head)
             ircfg = self.ssa_to_unssa(ssa, head)
 
-            if args.stack2var:
-                replace_stack_vars(self.ir_arch, ircfg)
-
             ircfg_simplifier = IRCFGSimplifierCommon(self.ir_arch)
             ircfg_simplifier.deadremoval.add_expr_to_original_expr(ssa.ssa_variable_to_expr)
             ircfg_simplifier.simplify(ircfg, head)
             return ircfg
 
-
-
     head = list(entry_points)[0]
     simplifier = CustomIRCFGSimplifierSSA(ir_arch_a)
     ircfg = simplifier.simplify(ircfg_a, head)
diff --git a/example/jitter/unpack_upx.py b/example/jitter/unpack_upx.py
index baa6f0bb..2527f0c4 100644
--- a/example/jitter/unpack_upx.py
+++ b/example/jitter/unpack_upx.py
@@ -1,9 +1,8 @@
 from __future__ import print_function
 import os
 import logging
-from pdb import pm
-from miasm.loader import pe
 from miasm.analysis.sandbox import Sandbox_Win_x86_32
+from miasm.jitter.loader.pe import vm2pe
 
 from miasm.os_dep.common import get_win_str_a
 
@@ -77,45 +76,30 @@ if options.verbose is True:
     print(sb.jitter.vm)
 
 
-def update_binary(jitter):
-    sb.pe.Opthdr.AddressOfEntryPoint = sb.pe.virt2rva(jitter.pc)
-    logging.info('updating binary')
-    for s in sb.pe.SHList:
-        sdata = sb.jitter.vm.get_mem(sb.pe.rva2virt(s.addr), s.rawsize)
-        sb.pe.rva.set(s.addr, sdata)
+def stop(jitter):
+    logging.info('OEP reached')
 
     # Stop execution
     jitter.run = False
     return False
 
 # Set callbacks
-sb.jitter.add_breakpoint(end_offset, update_binary)
+sb.jitter.add_breakpoint(end_offset, stop)
 
 # Run
 sb.run()
 
-# Rebuild PE
-# Alternative solution: miasm.jitter.loader.pe.vm2pe(sb.jitter, out_fname,
-# libs=sb.libs, e_orig=sb.pe)
-new_dll = []
-
-sb.pe.SHList.align_sections(0x1000, 0x1000)
-logging.info(repr(sb.pe.SHList))
-
-sb.pe.DirRes = pe.DirRes(sb.pe)
-sb.pe.DirImport.impdesc = None
-logging.info(repr(sb.pe.DirImport.impdesc))
-new_dll = sb.libs.gen_new_lib(sb.pe)
-logging.info(new_dll)
-sb.pe.DirImport.impdesc = []
-sb.pe.DirImport.add_dlldesc(new_dll)
-s_myimp = sb.pe.SHList.add_section(name="myimp", rawsize=len(sb.pe.DirImport))
-logging.info(repr(sb.pe.SHList))
-sb.pe.DirImport.set_rva(s_myimp.addr)
-
-# XXXX TODO
-sb.pe.NThdr.optentries[pe.DIRECTORY_ENTRY_DELAY_IMPORT].rva = 0
-
+# Construct the output filename
 bname, fname = os.path.split(options.filename)
 fname = os.path.join(bname, fname.replace('.', '_'))
-open(fname + '_unupx.bin', 'wb').write(bytes(sb.pe))
+out_fname = fname + '_unupx.bin'
+
+# Rebuild the PE thanks to `vm2pe`
+#
+# vm2pe will:
+# - set the new entry point to the current address (ie, the OEP)
+# - dump each section from the virtual memory into the new PE
+# - use `sb.libs` to generate a new import directory, and use it in the new PE
+# - save the resulting PE in `out_fname`
+
+vm2pe(sb.jitter, out_fname, libs=sb.libs, e_orig=sb.pe)
diff --git a/miasm/analysis/data_flow.py b/miasm/analysis/data_flow.py
index ef8a8cb0..7340c023 100644
--- a/miasm/analysis/data_flow.py
+++ b/miasm/analysis/data_flow.py
@@ -1,19 +1,21 @@
 """Data flow analysis based on miasm intermediate representation"""
 from builtins import range
-from collections import namedtuple
-
+from collections import namedtuple, Counter
+from pprint import pprint as pp
 from future.utils import viewitems, viewvalues
 from miasm.core.utils import encode_hex
 from miasm.core.graph import DiGraph
 from miasm.ir.ir import AssignBlock, IRBlock
 from miasm.expression.expression import ExprLoc, ExprMem, ExprId, ExprInt,\
-    ExprAssign, ExprOp, ExprWalk, is_function_call
-from miasm.expression.simplifications import expr_simp
+    ExprAssign, ExprOp, ExprWalk, ExprSlice, \
+    is_function_call, ExprVisitorCallbackBottomToTop
+from miasm.expression.simplifications import expr_simp, expr_simp_explicit
 from miasm.core.interval import interval
 from miasm.expression.expression_helper import possible_values
 from miasm.analysis.ssa import get_phi_sources_parent_block, \
     irblock_has_phi
-
+from miasm.ir.symbexec import get_expr_base_offset
+from collections import deque
 
 class ReachingDefinitions(dict):
     """
@@ -131,7 +133,7 @@ class DiGraphDefUse(DiGraph):
 
 
     def __init__(self, reaching_defs,
-                 deref_mem=False, *args, **kwargs):
+                 deref_mem=False, apply_simp=False, *args, **kwargs):
         """Instantiate a DiGraph
         @blocks: IR blocks
         """
@@ -144,7 +146,8 @@ class DiGraphDefUse(DiGraph):
 
         super(DiGraphDefUse, self).__init__(*args, **kwargs)
         self._compute_def_use(reaching_defs,
-                              deref_mem=deref_mem)
+                              deref_mem=deref_mem,
+                              apply_simp=apply_simp)
 
     def edge_attr(self, src, dst):
         """
@@ -155,18 +158,20 @@ class DiGraphDefUse(DiGraph):
         return self._edge_attr[(src, dst)]
 
     def _compute_def_use(self, reaching_defs,
-                         deref_mem=False):
+                         deref_mem=False, apply_simp=False):
         for block in viewvalues(self._blocks):
             self._compute_def_use_block(block,
                                         reaching_defs,
-                                        deref_mem=deref_mem)
+                                        deref_mem=deref_mem,
+                                        apply_simp=apply_simp)
 
-    def _compute_def_use_block(self, block, reaching_defs, deref_mem=False):
+    def _compute_def_use_block(self, block, reaching_defs, deref_mem=False, apply_simp=False):
         for index, assignblk in enumerate(block):
             assignblk_reaching_defs = reaching_defs.get_definitions(block.loc_key, index)
             for lval, expr in viewitems(assignblk):
                 self.add_node(AssignblkNode(block.loc_key, index, lval))
 
+                expr = expr_simp_explicit(expr) if apply_simp else expr
                 read_vars = expr.get_r(mem_read=deref_mem)
                 if deref_mem and lval.is_mem():
                     read_vars.update(lval.ptr.get_r(mem_read=deref_mem))
@@ -735,278 +740,6 @@ def expr_has_mem(expr):
     return visitor.visit(expr)
 
 
-class PropagateThroughExprId(object):
-    """
-    Propagate expressions though ExprId
-    """
-
-    def has_propagation_barrier(self, assignblks):
-        """
-        Return True if propagation cannot cross the @assignblks
-        @assignblks: list of AssignBlock to check
-        """
-        for assignblk in assignblks:
-            for dst, src in viewitems(assignblk):
-                if is_function_call(src):
-                    return True
-                if dst.is_mem():
-                    return True
-        return False
-
-    def is_mem_written(self, ssa, node_a, node_b):
-        """
-        Return True if memory is written at least once between @node_a and
-        @node_b
-
-        @node: AssignblkNode representing the start position
-        @successor: AssignblkNode representing the end position
-        """
-
-        block_b = ssa.graph.blocks[node_b.label]
-        nodes_to_do = self.compute_reachable_nodes_from_a_to_b(ssa.graph, node_a.label, node_b.label)
-
-        if node_a.label == node_b.label:
-            # src is dst
-            assert nodes_to_do == set([node_a.label])
-            if self.has_propagation_barrier(block_b.assignblks[node_a.index:node_b.index]):
-                return True
-        else:
-            # Check everyone but node_a.label and node_b.label
-            for loc in nodes_to_do - set([node_a.label, node_b.label]):
-                if loc not in ssa.graph.blocks:
-                    continue
-                block = ssa.graph.blocks[loc]
-                if self.has_propagation_barrier(block.assignblks):
-                    return True
-            # Check node_a.label partially
-            block_a = ssa.graph.blocks[node_a.label]
-            if self.has_propagation_barrier(block_a.assignblks[node_a.index:]):
-                return True
-            if nodes_to_do.intersection(ssa.graph.successors(node_b.label)):
-                # There is a path from node_b.label to node_b.label => Check node_b.label fully
-                if self.has_propagation_barrier(block_b.assignblks):
-                    return True
-            else:
-                # Check node_b.label partially
-                if self.has_propagation_barrier(block_b.assignblks[:node_b.index]):
-                    return True
-        return False
-
-    def compute_reachable_nodes_from_a_to_b(self, ssa, loc_a, loc_b):
-        reachables_a = set(ssa.reachable_sons(loc_a))
-        reachables_b = set(ssa.reachable_parents_stop_node(loc_b, loc_a))
-        return reachables_a.intersection(reachables_b)
-
-    def propagation_allowed(self, ssa, to_replace, node_a, node_b):
-        """
-        Return True if we can replace @node_a source present in @to_replace into
-        @node_b
-
-        @node_a: AssignblkNode position
-        @node_b: AssignblkNode position
-        """
-        if not expr_has_mem(to_replace[node_a.var]):
-            return True
-        if self.is_mem_written(ssa, node_a, node_b):
-            return False
-        return True
-
-
-    def get_var_definitions(self, ssa):
-        """
-        Return a dictionary linking variable to its assignment location
-        @ssa: SSADiGraph instance
-        """
-        ircfg = ssa.graph
-        def_dct = {}
-        for node in ircfg.nodes():
-            block = ircfg.blocks.get(node, None)
-            if block is None:
-                continue
-            for index, assignblk in enumerate(block):
-                for dst, src in viewitems(assignblk):
-                    if not dst.is_id():
-                        continue
-                    if dst in ssa.immutable_ids:
-                        continue
-                    assert dst not in def_dct
-                    def_dct[dst] = node, index
-        return def_dct
-
-    def get_candidates(self, ssa, head, max_expr_depth):
-        def_dct = self.get_var_definitions(ssa)
-        defuse = SSADefUse.from_ssa(ssa)
-        to_replace = {}
-        node_to_reg = {}
-        for node in defuse.nodes():
-            if node.var in ssa.immutable_ids:
-                continue
-            src = defuse.get_node_target(node)
-            if max_expr_depth is not None and len(str(src)) > max_expr_depth:
-                continue
-            if is_function_call(src):
-                continue
-            if node.var.is_mem():
-                continue
-            if src.is_op('Phi'):
-                continue
-            to_replace[node.var] = src
-            node_to_reg[node] = node.var
-        return node_to_reg, to_replace, defuse
-
-    def propagate(self, ssa, head, max_expr_depth=None):
-        """
-        Do expression propagation
-        @ssa: SSADiGraph instance
-        @head: the head location of the graph
-        @max_expr_depth: the maximum allowed depth of an expression
-        """
-        node_to_reg, to_replace, defuse = self.get_candidates(ssa, head, max_expr_depth)
-        modified = False
-        for node, reg in viewitems(node_to_reg):
-            for successor in defuse.successors(node):
-                if not self.propagation_allowed(ssa, to_replace, node, successor):
-                    continue
-
-                node_a = node
-                node_b = successor
-                block = ssa.graph.blocks[node_b.label]
-
-                replace = {node_a.var: to_replace[node_a.var]}
-                # Replace
-                assignblks = list(block)
-                assignblk = block[node_b.index]
-                out = {}
-                for dst, src in viewitems(assignblk):
-                    if src.is_op('Phi'):
-                        out[dst] = src
-                        continue
-
-                    if src.is_mem():
-                        ptr = src.ptr.replace_expr(replace)
-                        new_src = ExprMem(ptr, src.size)
-                    else:
-                        new_src = src.replace_expr(replace)
-
-                    if dst.is_id():
-                        new_dst = dst
-                    elif dst.is_mem():
-                        ptr = dst.ptr.replace_expr(replace)
-                        new_dst = ExprMem(ptr, dst.size)
-                    else:
-                        new_dst = dst.replace_expr(replace)
-                        if not (new_dst.is_id() or new_dst.is_mem()):
-                            new_dst = dst
-                    if src != new_src or dst != new_dst:
-                        modified = True
-                    out[new_dst] = new_src
-                out = AssignBlock(out, assignblk.instr)
-                assignblks[node_b.index] = out
-                new_block = IRBlock(block.loc_key, assignblks)
-                ssa.graph.blocks[block.loc_key] = new_block
-
-        return modified
-
-
-
-class PropagateExprIntThroughExprId(PropagateThroughExprId):
-    """
-    Propagate ExprInt though ExprId: classic constant propagation
-    This is a sub family of PropagateThroughExprId.
-    It reduces leaves in expressions of a program.
-    """
-
-    def get_candidates(self, ssa, head, max_expr_depth):
-        defuse = SSADefUse.from_ssa(ssa)
-
-        to_replace = {}
-        node_to_reg = {}
-        for node in defuse.nodes():
-            src = defuse.get_node_target(node)
-            if not src.is_int():
-                continue
-            if is_function_call(src):
-                continue
-            if node.var.is_mem():
-                continue
-            to_replace[node.var] = src
-            node_to_reg[node] = node.var
-        return node_to_reg, to_replace, defuse
-
-    def propagation_allowed(self, ssa, to_replace, node_a, node_b):
-        """
-        Propagating ExprInt is always ok
-        """
-        return True
-
-
-class PropagateThroughExprMem(object):
-    """
-    Propagate through ExprMem in very simple cases:
-    - if no memory write between source and target
-    - if source does not contain any memory reference
-    """
-
-    def propagate(self, ssa, head, max_expr_depth=None):
-        ircfg = ssa.graph
-        todo = set()
-        modified = False
-        for block in viewvalues(ircfg.blocks):
-            for i, assignblk in enumerate(block):
-                for dst, src in viewitems(assignblk):
-                    if not dst.is_mem():
-                        continue
-                    if expr_has_mem(src):
-                        continue
-                    todo.add((block.loc_key, i + 1, dst, src))
-                    ptr = dst.ptr
-                    for size in range(8, dst.size, 8):
-                        todo.add((block.loc_key, i + 1, ExprMem(ptr, size), src[:size]))
-
-        while todo:
-            loc_key, index, mem_dst, mem_src = todo.pop()
-            block = ircfg.blocks.get(loc_key, None)
-            if block is None:
-                continue
-            assignblks = list(block)
-            block_modified = False
-            for i in range(index, len(block)):
-                assignblk = block[i]
-                write_mem = False
-                assignblk_modified = False
-                out = dict(assignblk)
-                out_new = {}
-                for dst, src in viewitems(out):
-                    if dst.is_mem():
-                        write_mem = True
-                        ptr = dst.ptr.replace_expr({mem_dst:mem_src})
-                        dst = ExprMem(ptr, dst.size)
-                    src = src.replace_expr({mem_dst:mem_src})
-                    out_new[dst] = src
-                if out != out_new:
-                    assignblk_modified = True
-
-                if assignblk_modified:
-                    assignblks[i] = AssignBlock(out_new, assignblk.instr)
-                    block_modified = True
-                if write_mem:
-                    break
-            else:
-                # If no memory written, we may propagate to sons
-                # if son has only parent
-                for successor in ircfg.successors(loc_key):
-                    predecessors = ircfg.predecessors(successor)
-                    if len(predecessors) != 1:
-                        continue
-                    todo.add((successor, 0, mem_dst, mem_src))
-
-            if block_modified:
-                modified = True
-                new_block = IRBlock(block.loc_key, assignblks)
-                ircfg.blocks[block.loc_key] = new_block
-        return modified
-
-
 def stack_to_reg(expr):
     if expr.is_mem():
         ptr = expr.arg
@@ -1683,3 +1416,795 @@ class DiGraphLivenessSSA(DiGraphLivenessIRA):
 
         parent_block.infos[-1].var_out = var_info
         todo.add(parent)
+
+
+def get_phi_sources(phi_src, phi_dsts, ids_to_src):
+    """
+    Return False if the @phi_src has more than one non-phi source
+    Else, return its source
+    @ids_to_src: Dictionary linking phi source to its definition
+    """
+    true_values = set()
+    for src in phi_src.args:
+        if src in phi_dsts:
+            # Source is phi dst => skip
+            continue
+        true_src = ids_to_src[src]
+        if true_src in phi_dsts:
+            # Source is phi dst => skip
+            continue
+        # Check if src is not also a phi
+        if true_src.is_op('Phi'):
+            phi_dsts.add(src)
+            true_src = get_phi_sources(true_src, phi_dsts, ids_to_src)
+        if true_src is False:
+            return False
+        if true_src is True:
+            continue
+        true_values.add(true_src)
+        if len(true_values) != 1:
+            return False
+    if not true_values:
+        return True
+    if len(true_values) != 1:
+        return False
+    true_value = true_values.pop()
+    return true_value
+
+
+class DelDummyPhi(object):
+    """
+    Del dummy phi
+    """
+
+    def del_dummy_phi(self, ssa, head):
+        ids_to_src = {}
+        for block in viewvalues(ssa.graph.blocks):
+            for index, assignblock in enumerate(block):
+                for dst, src in viewitems(assignblock):
+                    if not dst.is_id():
+                        continue
+                    ids_to_src[dst] = src
+
+        modified = False
+        for block in ssa.graph.blocks.values():
+            if not irblock_has_phi(block):
+                continue
+            assignblk = block[0]
+            modified_assignblk = False
+            for dst, phi_src in viewitems(assignblk):
+                assert phi_src.is_op('Phi')
+                true_value = get_phi_sources(phi_src, set([dst]), ids_to_src)
+                if true_value is False:
+                    continue
+                if expr_has_mem(true_value):
+                    continue
+                fixed_phis = {}
+                for old_dst, old_phi_src in viewitems(assignblk):
+                    if old_dst == dst:
+                        continue
+                    fixed_phis[old_dst] = old_phi_src
+
+                modified = True
+
+                assignblks = list(block)
+                assignblks[0] = AssignBlock(fixed_phis, assignblk.instr)
+                assignblks[1:1] = [AssignBlock({dst: true_value}, assignblk.instr)]
+                new_irblock = IRBlock(block.loc_key, assignblks)
+                ssa.graph.blocks[block.loc_key] = new_irblock
+
+        return modified
+
+
+def replace_expr_from_bottom(expr_orig, dct):
+    def replace(expr):
+        if expr in dct:
+            return dct[expr]
+        return expr
+    visitor = ExprVisitorCallbackBottomToTop(lambda expr:replace(expr))
+    return visitor.visit(expr_orig)
+
+
+def is_mem_sub_part(needle, mem):
+    """
+    If @needle is a sub part of @mem, return the offset of @needle in @mem
+    Else, return False
+    @needle: ExprMem
+    @mem: ExprMem
+    """
+    ptr_base_a, ptr_offset_a = get_expr_base_offset(needle.ptr)
+    ptr_base_b, ptr_offset_b = get_expr_base_offset(mem.ptr)
+    if ptr_base_a != ptr_base_b:
+        return False
+    # Test if sub part starts after mem
+    if not (ptr_offset_b <= ptr_offset_a < ptr_offset_b + mem.size // 8):
+        return False
+    # Test if sub part ends before mem
+    if not (ptr_offset_a + needle.size // 8 <= ptr_offset_b + mem.size // 8):
+        return False
+    return ptr_offset_a - ptr_offset_b
+
+class UnionFind(object):
+    """
+    Implementation of UnionFind structure
+    __classes: a list of Set of equivalent elements
+    node_to_class: Dictionary linkink an element to its equivalent class
+    order: Dictionary link an element to it's weight
+
+    The order attributes is used to allow the selection of a representative
+    element of an equivalence class
+    """
+
+    def __init__(self):
+        self.index = 0
+        self.__classes = []
+        self.node_to_class = {}
+        self.order = dict()
+
+    def copy(self):
+        """
+        Return a copy of the object
+        """
+        unionfind = UnionFind()
+        unionfind.index = self.index
+        unionfind.__classes = [set(known_class) for known_class in self.__classes]
+        node_to_class = {}
+        for class_eq in unionfind.__classes:
+            for node in class_eq:
+                node_to_class[node] = class_eq
+        unionfind.node_to_class = node_to_class
+        unionfind.order = dict(self.order)
+        return unionfind
+
+    def replace_node(self, old_node, new_node):
+        """
+        Replace the @old_node by the @new_node
+        """
+        classes = self.get_classes()
+        node_to_class = dict(self.node_to_class)
+
+        new_classes = []
+        replace_dct = {old_node:new_node}
+        for eq_class in classes:
+            new_class = set()
+            for node in eq_class:
+                new_class.add(replace_expr_from_bottom(node, replace_dct))
+            new_classes.append(new_class)
+
+        node_to_class = {}
+        for class_eq in new_classes:
+            for node in class_eq:
+                node_to_class[node] = class_eq
+        self.__classes = new_classes
+        self.node_to_class = node_to_class
+        new_order = dict()
+        for node,index in self.order.items():
+            new_node = replace_expr_from_bottom(node, replace_dct)
+            new_order[new_node] = index
+        self.order = new_order
+
+    def get_classes(self):
+        """
+        Return a list of the equivalent classes
+        """
+        classes = []
+        for class_tmp in self.__classes:
+            classes.append(set(class_tmp))
+        return classes
+
+    def nodes(self):
+        for known_class in self.__classes:
+            for node in known_class:
+                yield node
+
+    def __eq__(self, other):
+        if self is other:
+            return True
+        if self.__class__ is not other.__class__:
+            return False
+
+        return Counter(frozenset(known_class) for known_class in self.__classes) == Counter(frozenset(known_class) for known_class in other.__classes)
+
+    def __ne__(self, other):
+        # required Python 2.7.14
+        return not self == other
+
+    def __str__(self):
+        components = self.__classes
+        out = ['UnionFind<']
+        for component in components:
+            out.append("\t" + (", ".join([str(node) for node in component])))
+        out.append('>')
+        return "\n".join(out)
+
+    def add_equivalence(self, node_a, node_b):
+        """
+        Add the new equivalence @node_a == @node_b
+        @node_a is equivalent to @node_b, but @node_b is more representative
+        than @node_a
+        """
+        if node_b not in self.order:
+            self.order[node_b] = self.index
+            self.index += 1
+        # As node_a is destination, we always replace its index
+        self.order[node_a] = self.index
+        self.index += 1
+
+        if node_a not in self.node_to_class and node_b not in self.node_to_class:
+            new_class = set([node_a, node_b])
+            self.node_to_class[node_a] = new_class
+            self.node_to_class[node_b] = new_class
+            self.__classes.append(new_class)
+        elif node_a in self.node_to_class and node_b not in self.node_to_class:
+            known_class = self.node_to_class[node_a]
+            known_class.add(node_b)
+            self.node_to_class[node_b] = known_class
+        elif node_a not in self.node_to_class and node_b in self.node_to_class:
+            known_class = self.node_to_class[node_b]
+            known_class.add(node_a)
+            self.node_to_class[node_a] = known_class
+        else:
+            raise RuntimeError("Two nodes cannot be in two classes")
+
+    def _get_master(self, node):
+        if node not in self.node_to_class:
+            return None
+        known_class = self.node_to_class[node]
+        best_node = node
+        for node in known_class:
+            if self.order[node] < self.order[best_node]:
+                best_node = node
+        return best_node
+
+    def get_master(self, node):
+        """
+        Return the representative element of the equivalence class containing
+        @node
+        @node: ExprMem or ExprId
+        """
+        if not node.is_mem():
+            return self._get_master(node)
+        if node in self.node_to_class:
+            # Full expr mem is known
+            return self._get_master(node)
+        # Test if mem is sub part of known node
+        for expr in self.node_to_class:
+            if not expr.is_mem():
+                continue
+            ret = is_mem_sub_part(node, expr)
+            if ret is False:
+                continue
+            master = self._get_master(expr)
+            master = master[ret * 8 : ret * 8 + node.size]
+            return master
+
+        return self._get_master(node)
+
+
+    def del_element(self, node):
+        """
+        Remove @node for the equivalence classes
+        """
+        assert node in self.node_to_class
+        known_class = self.node_to_class[node]
+        known_class.discard(node)
+        del(self.node_to_class[node])
+        del(self.order[node])
+
+    def del_get_new_master(self, node):
+        """
+        Remove @node for the equivalence classes and return it's representative
+        equivalent element
+        @node: Element to delete
+        """
+        if node not in self.node_to_class:
+            return None
+        known_class = self.node_to_class[node]
+        known_class.discard(node)
+        del(self.node_to_class[node])
+        del(self.order[node])
+
+        if not known_class:
+            return None
+        best_node = list(known_class)[0]
+        for node in known_class:
+            if self.order[node] < self.order[best_node]:
+                best_node = node
+        return best_node
+
+class ExprToGraph(ExprWalk):
+    """
+    Transform an Expression into a tree and add link nodes to an existing tree
+    """
+    def __init__(self, graph):
+        super(ExprToGraph, self).__init__(self.link_nodes)
+        self.graph = graph
+
+    def link_nodes(self, expr, *args, **kwargs):
+        """
+        Transform an Expression @expr into a tree and add link nodes to the
+        current tree
+        @expr: Expression
+        """
+        if expr in self.graph.nodes():
+            return None
+        self.graph.add_node(expr)
+        if expr.is_mem():
+            self.graph.add_uniq_edge(expr, expr.ptr)
+        elif expr.is_slice():
+            self.graph.add_uniq_edge(expr, expr.arg)
+        elif expr.is_cond():
+            self.graph.add_uniq_edge(expr, expr.cond)
+            self.graph.add_uniq_edge(expr, expr.src1)
+            self.graph.add_uniq_edge(expr, expr.src2)
+        elif expr.is_compose():
+            for arg in expr.args:
+                self.graph.add_uniq_edge(expr, arg)
+        elif expr.is_op():
+            for arg in expr.args:
+                self.graph.add_uniq_edge(expr, arg)
+        return None
+
+class State(object):
+    """
+    Object representing the state of a program at a given point
+    The state is represented using equivalence classes
+
+    Each assignment can create/destroy equivalence classes. Interferences
+    between expression is computed using `may_interfer` function
+    """
+
+    def __init__(self):
+        self.equivalence_classes = UnionFind()
+        self.undefined = set()
+
+    def copy(self):
+        state = self.__class__()
+        state.equivalence_classes = self.equivalence_classes.copy()
+        state.undefined = self.undefined.copy()
+        return state
+
+    def __eq__(self, other):
+        if self is other:
+            return True
+        if self.__class__ is not other.__class__:
+            return False
+        return (
+            set(self.equivalence_classes.nodes()) == set(other.equivalence_classes.nodes()) and
+            sorted(self.equivalence_classes.edges()) == sorted(other.equivalence_classes.edges()) and
+            self.undefined == other.undefined
+        )
+
+    def __ne__(self, other):
+        # required Python 2.7.14
+        return not self == other
+
+    def may_interfer(self, dsts, src):
+        """
+        Return True is @src may interfer with expressions in @dsts
+        @dsts: Set of Expressions
+        @src: expression to test
+        """
+
+        srcs = src.get_r()
+        for src in srcs:
+            for dst in dsts:
+                if dst in src:
+                    return True
+                if dst.is_mem() and src.is_mem():
+                    base1, offset1 = get_expr_base_offset(dst.ptr)
+                    base2, offset2 = get_expr_base_offset(src.ptr)
+                    if base1 != base2:
+                        return True
+                    assert offset1 + dst.size // 8 - 1 <= int(base1.mask)
+                    assert offset2 + src.size // 8 - 1 <= int(base2.mask)
+                    interval1 = interval([(offset1, offset1 + dst.size // 8 - 1)])
+                    interval2 = interval([(offset2, offset2 + src.size // 8 - 1)])
+                    if (interval1 & interval2).empty:
+                        continue
+                    return True
+        return False
+
+    def _get_representative_expr(self, expr):
+        representative = self.equivalence_classes.get_master(expr)
+        if representative is None:
+            return expr
+        return representative
+
+    def get_representative_expr(self, expr):
+        """
+        Replace each sub expression of @expr by its representative element
+        @expr: Expression to analyse
+        """
+        new_expr = expr.visit(self._get_representative_expr)
+        return new_expr
+
+    def propagation_allowed(self, expr):
+        """
+        Return True if @expr can be propagated
+        Don't propagate:
+        - Phi nodes
+        - call_func_ret / call_func_stack operants
+        """
+
+        if (
+                expr.is_op('Phi') or
+                (expr.is_op() and expr.op.startswith("call_func"))
+        ):
+            return False
+        return True
+
+    def eval_assignblock(self, assignblock):
+        """
+        Evaluate the @assignblock on the current state
+        @assignblock: AssignBlock instance
+        """
+
+        out = dict(assignblock.items())
+        new_out = dict()
+        # Replace sub expression by their equivalence class repesentative
+        for dst, src in out.items():
+            if src.is_op('Phi'):
+                # Don't replace in phi
+                new_src = src
+            else:
+                new_src = self.get_representative_expr(src)
+            if dst.is_mem():
+                new_ptr = self.get_representative_expr(dst.ptr)
+                new_dst = ExprMem(new_ptr, dst.size)
+            else:
+                new_dst = dst
+            new_dst = expr_simp(new_dst)
+            new_src = expr_simp(new_src)
+            new_out[new_dst] = new_src
+
+        # For each destination, update (or delete) dependent's node according to
+        # equivalence classes
+        classes = self.equivalence_classes
+
+        for dst in new_out:
+
+            replacement = classes.del_get_new_master(dst)
+            if replacement is None:
+                to_del = set([dst])
+                to_replace = {}
+            else:
+                to_del = set()
+                to_replace = {dst:replacement}
+
+            graph = DiGraph()
+            # Build en expression graph linking all classes
+            has_parents = False
+            for node in classes.nodes():
+                if dst in node:
+                    # Only dependent nodes are interesting here
+                    has_parents = True
+                    expr_to_graph = ExprToGraph(graph)
+                    expr_to_graph.visit(node)
+
+            if not has_parents:
+                continue
+
+            todo = graph.leaves()
+            done = set()
+
+            while todo:
+                node = todo.pop(0)
+                if node in done:
+                    continue
+                # If at least one son is not done, re do later
+                if [son for son in graph.successors(node) if son not in done]:
+                    todo.append(node)
+                    continue
+                done.add(node)
+
+                # If at least one son cannot be replaced (deleted), our last
+                # chance is to have an equivalence
+                if any(son in to_del for son in graph.successors(node)):
+                    # One son has been deleted!
+                    # Try to find a replacement of the whole expression
+                    replacement = classes.del_get_new_master(node)
+                    if replacement is None:
+                        to_del.add(node)
+                        for predecessor in graph.predecessors(node):
+                            if predecessor not in todo:
+                                todo.append(predecessor)
+                        continue
+                    else:
+                        to_replace[node] = replacement
+                        # Continue with replacement
+
+                # Everyson is live or has been replaced
+                new_node = node.replace_expr(to_replace)
+
+                if new_node == node:
+                    # If node is not touched (Ex: leaf node)
+                    for predecessor in graph.predecessors(node):
+                        if predecessor not in todo:
+                            todo.append(predecessor)
+                    continue
+
+                # Node has been modified, update equivalence classes
+                classes.replace_node(node, new_node)
+                to_replace[node] = new_node
+
+                for predecessor in graph.predecessors(node):
+                    if predecessor not in todo:
+                        todo.append(predecessor)
+
+                continue
+
+        new_assignblk = AssignBlock(new_out, assignblock.instr)
+        dsts = new_out.keys()
+
+        # Remove interfering known classes
+        to_del = set()
+        for node in list(classes.nodes()):
+            if self.may_interfer(dsts, node):
+                # Interfer with known equivalence class
+                self.equivalence_classes.del_element(node)
+                if node.is_id() or node.is_mem():
+                    self.undefined.add(node)
+
+
+        # Update equivalence classes
+        for dst, src in new_out.items():
+            # Delete equivalence class interfering with dst
+            to_del = set()
+            classes = self.equivalence_classes
+            for node in classes.nodes():
+                if dst in node:
+                    to_del.add(node)
+            for node in to_del:
+                self.equivalence_classes.del_element(node)
+                if node.is_id() or node.is_mem():
+                    self.undefined.add(node)
+
+            # Don't create equivalence if self interfer
+            if self.may_interfer(dsts, src):
+                if dst in self.equivalence_classes.nodes():
+                    self.equivalence_classes.del_element(dst)
+                    if dst.is_id() or dst.is_mem():
+                        self.undefined.add(dst)
+                continue
+
+            if not self.propagation_allowed(src):
+                continue
+
+            ## Dont create equivalence if dependence on undef
+            if dst.is_mem() and self.may_interfer(self.undefined, dst.ptr):
+                continue
+
+            self.undefined.discard(dst)
+            if dst in self.equivalence_classes.nodes():
+                self.equivalence_classes.del_element(dst)
+            self.equivalence_classes.add_equivalence(dst, src)
+
+        return new_assignblk
+
+
+    def merge(self, other):
+        """
+        Merge the current state with @other
+        @other: State instance
+        """
+        classes1 = self.equivalence_classes
+        classes2 = other.equivalence_classes
+
+        undefined = set(node for node in self.undefined if node.is_id() or node.is_mem())
+        undefined.update(set(node for node in other.undefined if node.is_id() or node.is_mem()))
+        # Should we compute interference between srcs and undefined ?
+        # Nop => should already interfer in other state
+        components1 = classes1.get_classes()
+        components2 = classes2.get_classes()
+
+        node_to_component2 = {}
+        for component in components2:
+            for node in component:
+                node_to_component2[node] = component
+
+        out = []
+        nodes_ok = set()
+        while components1:
+            component1 = components1.pop()
+            new_component1 = set()
+            for node in component1:
+                if node in undefined:
+                    continue
+                component2 = node_to_component2.get(node)
+                if component2 is None:
+                    if node.is_id() or node.is_mem():
+                        assert(node not in nodes_ok)
+                        undefined.add(node)
+                    continue
+                if node not in component2:
+                    continue
+                common = component1.intersection(component2)
+                if len(common) == 1:
+                    if node.is_id() or node.is_mem():
+                        assert(node not in nodes_ok)
+                        undefined.add(node)
+                        component2.discard(common.pop())
+                    continue
+                if common:
+                    nodes_ok.update(common)
+                    out.append(common)
+                diff = component1.difference(common)
+                if diff:
+                    components1.append(diff)
+                component2.difference_update(common)
+                break
+
+        # Discard remaining components2 elements
+        for component in components2:
+            for node in component:
+                if node.is_id() or node.is_mem():
+                    assert(node not in nodes_ok)
+                    undefined.add(node)
+
+        all_nodes = set()
+        for common in out:
+            all_nodes.update(common)
+
+        new_order = dict(
+            (node, index) for (node, index) in classes1.order.items()
+            if node in all_nodes
+        )
+
+        unionfind = UnionFind()
+        new_classes = []
+        global_max_index = 0
+        for common in out:
+            min_index = None
+            master = None
+            for node in common:
+                index = new_order[node]
+                global_max_index = max(index, global_max_index)
+                if min_index is None or min_index > index:
+                    min_index = index
+                    master = node
+            for node in common:
+                if node == master:
+                    continue
+                unionfind.add_equivalence(node, master)
+
+        unionfind.index = global_max_index
+        unionfind.order = new_order
+        state = self.__class__()
+        state.equivalence_classes = unionfind
+        state.undefined = undefined
+
+        return state
+
+
+class PropagateExpressions(object):
+    """
+    Propagate expressions
+
+    The algorithm propagates equivalence classes expressions from the entry
+    point. During the analyse, we replace source nodes by its equivalence
+    classes representative. Equivalence classes can be modified during analyse
+    due to memory aliasing.
+
+    For example:
+    B = A+1
+    C = A
+    A = 6
+    D = [B]
+
+    Will result in:
+    B = A+1
+    C = A
+    A = 6
+    D = [C+1]
+    """
+
+    @staticmethod
+    def new_state():
+        return State()
+
+    def merge_prev_states(self, ircfg, states, loc_key):
+        """
+        Merge predecessors states of irblock at location @loc_key
+        @ircfg: IRCfg instance
+        @sates: Dictionary linking locations to state
+        @loc_key: location of the current irblock
+        """
+
+        prev_states = []
+        for predecessor in ircfg.predecessors(loc_key):
+            prev_states.append((predecessor, states[predecessor]))
+
+        filtered_prev_states = []
+        for (_, prev_state) in prev_states:
+            if prev_state is not None:
+                filtered_prev_states.append(prev_state)
+
+        prev_states = filtered_prev_states
+        if not prev_states:
+            state = self.new_state()
+        elif len(prev_states) == 1:
+            state = prev_states[0].copy()
+        else:
+            while prev_states:
+                state = prev_states.pop()
+                if state is not None:
+                    break
+            for prev_state in prev_states:
+                state = state.merge(prev_state)
+
+        return state
+
+    def update_state(self, irblock, state):
+        """
+        Propagate the @state through the @irblock
+        @irblock: IRBlock instance
+        @state: State instance
+        """
+        new_assignblocks = []
+        modified = False
+
+        for index, assignblock in enumerate(irblock):
+            if not assignblock.items():
+                continue
+            new_assignblk = state.eval_assignblock(assignblock)
+            new_assignblocks.append(new_assignblk)
+            if new_assignblk != assignblock:
+                modified = True
+
+        new_irblock = IRBlock(irblock.loc_key, new_assignblocks)
+
+        return new_irblock, modified
+
+    def propagate(self, ssa, head, max_expr_depth=None):
+        """
+        Apply algorithm on the @ssa graph
+        """
+        ircfg = ssa.ircfg
+        self.loc_db = ircfg.loc_db
+        irblocks = ssa.ircfg.blocks
+        states = {}
+        for loc_key, irblock in irblocks.items():
+            states[loc_key] = None
+
+        todo = deque([head])
+        while todo:
+            loc_key = todo.popleft()
+            irblock = irblocks.get(loc_key)
+            if irblock is None:
+                continue
+
+            state_orig = states[loc_key]
+            state = self.merge_prev_states(ircfg, states, loc_key)
+            state = state.copy()
+
+            new_irblock, modified_irblock = self.update_state(irblock, state)
+            if (
+                    state_orig is not None and
+                    state.equivalence_classes == state_orig.equivalence_classes and
+                    state.undefined == state_orig.undefined
+            ):
+                continue
+
+            if state_orig:
+                state.undefined.update(state_orig.undefined)
+            states[loc_key] = state
+            # Propagate to sons
+            for successor in ircfg.successors(loc_key):
+                todo.append(successor)
+
+        # Update blocks
+        todo = set(loc_key for loc_key in irblocks)
+        modified = False
+        while todo:
+            loc_key = todo.pop()
+            irblock = irblocks.get(loc_key)
+            if irblock is None:
+                continue
+
+            state = self.merge_prev_states(ircfg, states, loc_key)
+            new_irblock, modified_irblock = self.update_state(irblock, state)
+            modified |= modified_irblock
+            irblocks[new_irblock.loc_key] = new_irblock
+
+        return modified
diff --git a/miasm/analysis/gdbserver.py b/miasm/analysis/gdbserver.py
index ac58cdad..b45e9f35 100644
--- a/miasm/analysis/gdbserver.py
+++ b/miasm/analysis/gdbserver.py
@@ -251,8 +251,8 @@ class GdbServer(object):
                     else:
                         raise NotImplementedError("Unknown Except")
                 elif isinstance(ret, debugging.DebugBreakpointTerminate):
-                    # Connexion should close, but keep it running as a TRAP
-                    # The connexion will be close on instance destruction
+                    # Connection should close, but keep it running as a TRAP
+                    # The connection will be close on instance destruction
                     print(ret)
                     self.status = b"S05"
                     self.send_queue.append(b"S05")
diff --git a/miasm/analysis/sandbox.py b/miasm/analysis/sandbox.py
index 3040a1a8..1449d7be 100644
--- a/miasm/analysis/sandbox.py
+++ b/miasm/analysis/sandbox.py
@@ -213,6 +213,7 @@ class OS_Win(OS):
                 fstream.read(),
                 load_hdr=self.options.load_hdr,
                 name=self.fname,
+                winobjs=win_api_x86_32.winobjs,
                 **kwargs
             )
             self.name2module[fname_basename] = self.pe
@@ -227,6 +228,7 @@ class OS_Win(OS):
                     self.ALL_IMP_DLL,
                     libs,
                     self.modules_path,
+                    winobjs=win_api_x86_32.winobjs,
                     **kwargs
                 )
             )
@@ -242,6 +244,7 @@ class OS_Win(OS):
                 self.name2module,
                 libs,
                 self.modules_path,
+                winobjs=win_api_x86_32.winobjs,
                 **kwargs
             )
 
diff --git a/miasm/analysis/simplifier.py b/miasm/analysis/simplifier.py
index 8e9005a8..43623476 100644
--- a/miasm/analysis/simplifier.py
+++ b/miasm/analysis/simplifier.py
@@ -11,8 +11,8 @@ from miasm.expression.simplifications import expr_simp
 from miasm.ir.ir import AssignBlock, IRBlock
 from miasm.analysis.data_flow import DeadRemoval, \
     merge_blocks, remove_empty_assignblks, \
-    PropagateExprIntThroughExprId, PropagateThroughExprId, \
-    PropagateThroughExprMem, del_unused_edges
+    del_unused_edges, \
+    PropagateExpressions, DelDummyPhi
 
 
 log = logging.getLogger("simplifier")
@@ -129,9 +129,7 @@ class IRCFGSimplifierSSA(IRCFGSimplifierCommon):
     and apply out-of-ssa. Final passes of IRcfgSimplifier are applied
 
     This class apply following pass until reaching a fix point:
-    - do_propagate_int
-    - do_propagate_mem
-    - do_propagate_expr
+    - do_propagate_expressions
     - do_dead_simp_ssa
     """
 
@@ -143,9 +141,9 @@ class IRCFGSimplifierSSA(IRCFGSimplifierCommon):
 
         self.ssa_forbidden_regs = self.get_forbidden_regs()
 
-        self.propag_int = PropagateExprIntThroughExprId()
-        self.propag_expr = PropagateThroughExprId()
-        self.propag_mem = PropagateThroughExprMem()
+        self.propag_expressions = PropagateExpressions()
+        self.del_dummy_phi = DelDummyPhi()
+
         self.deadremoval = DeadRemoval(self.ir_arch, self.all_ssa_vars)
 
     def get_forbidden_regs(self):
@@ -167,9 +165,8 @@ class IRCFGSimplifierSSA(IRCFGSimplifierCommon):
         """
         self.passes = [
             self.simplify_ssa,
-            self.do_propagate_int,
-            self.do_propagate_mem,
-            self.do_propagate_expr,
+            self.do_propagate_expressions,
+            self.do_del_dummy_phi,
             self.do_dead_simp_ssa,
             self.do_remove_empty_assignblks,
             self.do_del_unused_edges,
@@ -245,13 +242,21 @@ class IRCFGSimplifierSSA(IRCFGSimplifierCommon):
         modified = self.propag_mem.propagate(ssa, head)
         return modified
 
-    @fix_point
-    def do_propagate_expr(self, ssa, head):
+    def do_propagate_expressions(self, ssa, head):
         """
         Expressions propagation through ExprId in the @ssa graph
         @head: Location instance of the graph head
         """
-        modified = self.propag_expr.propagate(ssa, head)
+        modified = self.propag_expressions.propagate(ssa, head)
+        return modified
+
+    @fix_point
+    def do_del_dummy_phi(self, ssa, head):
+        """
+        Del dummy phi
+        @head: Location instance of the graph head
+        """
+        modified = self.del_dummy_phi.del_dummy_phi(ssa, head)
         return modified
 
     @fix_point
diff --git a/miasm/arch/arm/arch.py b/miasm/arch/arm/arch.py
index fbccd329..2b4476f0 100644
--- a/miasm/arch/arm/arch.py
+++ b/miasm/arch/arm/arch.py
@@ -1660,6 +1660,33 @@ bs_mr_name = bs_name(l=1, name=mr_name)
 bs_addi = bs(l=1, fname="add_imm")
 bs_rw = bs_mod_name(l=1, fname='rw', mn_mod=['W', ''])
 
+class armt_barrier_option(reg_noarg, arm_arg):
+    reg_info = barrier_info
+    parser = reg_info.parser
+
+    def decode(self, v):
+        v = v & self.lmask
+        if v not in self.reg_info.dct_expr:
+            return False
+        self.expr = self.reg_info.dct_expr[v]
+        return True
+
+    def encode(self):
+        if not self.expr in self.reg_info.dct_expr_inv:
+            log.debug("cannot encode reg %r", self.expr)
+            return False
+        self.value = self.reg_info.dct_expr_inv[self.expr]
+        if self.value > self.lmask:
+            log.debug("cannot encode field value %x %x",
+                      self.value, self.lmask)
+            return False
+        return True
+
+    def check_fbits(self, v):
+        return v & self.fmask == self.fbits
+
+barrier_option = bs(l=4, cls=(armt_barrier_option,))
+
 armop("mul", [bs('000000'), bs('0'), scc, rd, bs('0000'), rs, bs('1001'), rm], [rd, rm, rs])
 armop("umull", [bs('000010'), bs('0'), scc, rd, rdl, rs, bs('1001'), rm], [rdl, rd, rm, rs])
 armop("umlal", [bs('000010'), bs('1'), scc, rd, rdl, rs, bs('1001'), rm], [rdl, rd, rm, rs])
@@ -1709,7 +1736,8 @@ armop("rev16", [bs('01101011'), bs('1111'), rd, bs('1111'), bs('1011'), rm])
 
 armop("pld", [bs8(0xF5), bs_addi, bs_rw, bs('01'), mem_rn_imm, bs('1111'), imm12_off])
 
-armop("isb", [bs8(0xF5), bs8(0x7F), bs8(0xF0), bs8(0x6F)])
+armop("dsb", [bs('111101010111'), bs('1111'), bs('1111'), bs('0000'), bs('0100'), barrier_option])
+armop("isb", [bs('111101010111'), bs('1111'), bs('1111'), bs('0000'), bs('0110'), barrier_option])
 armop("nop", [bs8(0xE3), bs8(0x20), bs8(0xF0), bs8(0)])
 
 class arm_widthm1(arm_imm, m_arg):
@@ -2326,7 +2354,6 @@ class arm_sp(arm_reg):
     reg_info = gpregs_sp
     parser = reg_info.parser
 
-
 off5 = bs(l=5, cls=(arm_imm,), fname="off")
 off3 = bs(l=3, cls=(arm_imm,), fname="off")
 off8 = bs(l=8, cls=(arm_imm,), fname="off")
@@ -3230,33 +3257,6 @@ bs_deref_reg_reg = bs(l=4, cls=(armt_deref_reg_reg,))
 bs_deref_reg_reg_lsl_1 = bs(l=4, cls=(armt_deref_reg_reg_lsl_1,))
 
 
-class armt_barrier_option(reg_noarg, arm_arg):
-    reg_info = barrier_info
-    parser = reg_info.parser
-
-    def decode(self, v):
-        v = v & self.lmask
-        if v not in self.reg_info.dct_expr:
-            return False
-        self.expr = self.reg_info.dct_expr[v]
-        return True
-
-    def encode(self):
-        if not self.expr in self.reg_info.dct_expr_inv:
-            log.debug("cannot encode reg %r", self.expr)
-            return False
-        self.value = self.reg_info.dct_expr_inv[self.expr]
-        if self.value > self.lmask:
-            log.debug("cannot encode field value %x %x",
-                      self.value, self.lmask)
-            return False
-        return True
-
-    def check_fbits(self, v):
-        return v & self.fmask == self.fbits
-
-barrier_option = bs(l=4, cls=(armt_barrier_option,))
-
 armtop("adc", [bs('11110'),  imm12_1, bs('0'), bs('1010'), scc, rn_nosppc, bs('0'), imm12_3, rd_nosppc, imm12_8])
 armtop("adc", [bs('11101'),  bs('01'), bs('1010'), scc, rn_nosppc, bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype, rm_sh])
 armtop("bl", [bs('11110'), tsign, timm10H, bs('11'), tj1, bs('1'), tj2, timm11L])
diff --git a/miasm/arch/arm/regs.py b/miasm/arch/arm/regs.py
index 63caada3..2b24b0d5 100644
--- a/miasm/arch/arm/regs.py
+++ b/miasm/arch/arm/regs.py
@@ -2,7 +2,7 @@
 
 from builtins import range
 from miasm.expression.expression import *
-
+from miasm.core.cpu import gen_reg, gen_regs
 
 # GP
 
@@ -111,4 +111,67 @@ regs_init = {}
 for i, r in enumerate(all_regs_ids):
     regs_init[r] = all_regs_ids_init[i]
 
+coproc_reg_str = [
+                    "MIDR", "CTR", "TCMTR", "TLBTR", "MIDR", "MPIDR", "REVIDR",
+                    "ID_PFR0", "ID_PFR1", "ID_DFR0", "ID_AFR0", "ID_MMFR0", "ID_MMFR1", "ID_MMFR2", "ID_MMFR3",
+                    "ID_ISAR0", "ID_ISAR1", "ID_ISAR2", "ID_ISAR3", "ID_ISAR4", "ID_ISAR5",
+                    "CCSIDR", "CLIDR", "AIDR",
+                    "CSSELR",
+                    "VPIDR", "VMPIDR",
+                    "SCTLR", "ACTLR", "CPACR",
+                    "SCR", "SDER", "NSACR",
+                    "HSCTLR", "HACTLR",
+                    "HCR", "HDCR", "HCPTR", "HSTR", "HACR",
+                    "TTBR0", "TTBR1", "TTBCR",
+                    "HTCR", "VTCR",
+                    "DACR",
+                    "DFSR", "IFSR",
+                    "ADFSR", "AIFSR",
+                    "HADFSR", "HAIFSR",
+                    "HSR",
+                    "DFAR", "IFAR",
+                    "HDFAR", "HIFAR", "HPFAR",
+                    "ICIALLUIS", "BPIALLIS",
+                    "PAR",
+                    "ICIALLU", "ICIMVAU", "CP15ISB", "BPIALL", "BPIMVA",
+                    "DCIMVAC", "DCISW",
+                    "ATS1CPR", "ATS1CPW", "ATS1CUR", "ATS1CUW", "ATS12NSOPR", "ATS12NSOPW", "ATS12NSOUR", "ATS12NSOUW",
+                    "DCCMVAC", "DCCSW", "CP15DSB", "CP15DMB",
+                    "DCCMVAU",
+                    "DCCIMVAC", "DCCISW",
+                    "ATS1HR", "ATS1HW",
+                    "TLBIALLIS", "TLBIMVAIS", "TLBIASIDIS", "TLBIMVAAIS",
+                    "ITLBIALL", "ITLBIMVA", "ITLBIASID",
+                    "DTLBIALL", "DTLBIMVA", "DTLBIASID",
+                    "TLBIALL", "TLBIMVA", "TLBIASID", "TLBIMVAA",
+                    "TLBIALLHIS", "TLBIMVAHIS", "TLBIALLNSNHIS",
+                    "TLBIALLH", "TLBIMVAH", "TLBIALLNSNH",
+                    "PMCR", "PMCNTENSET", "PMCNTENCLR", "PMOVSR", "PMSWINC", "PMSELR", "PMCEID0", "PMCEID1",
+                    "PMCCNTR", "PMXEVTYPER", "PMXEVCNTR",
+                    "PMUSERENR", "PMINTENSET", "PMINTENCLR", "PMOVSSET", 
+                    "PRRR", "NMRR",
+                    "AMAIR0", "AMAIR1",
+                    "HMAIR0", "HMAIR1",
+                    "HAMAIR0", "HAMAIR1",
+                    "VBAR", "MVBAR",
+                    "ISR",
+                    "HVBAR",
+                    "FCSEIDR", "CONTEXTIDR", "TPIDRURW", "TPIDRURO", "TPIDRPRW",
+                    "HTPIDR",
+                    "CNTFRQ",
+                    "CNTKCTL",
+                    "CNTP_TVAL", "CNTP_CTL",
+                    "CNTV_TVAL", "CNTV_CTL",
+                    "CNTHCTL",
+                    "CNTHP_TVAL", "CNTHP_CTL"
+                ]
+coproc_reg_expr, coproc_reg_init, coproc_reg_info = gen_regs(coproc_reg_str, globals(), 32)
+
+all_regs_ids = all_regs_ids + coproc_reg_expr
+all_regs_ids_byname.update(dict([(x.name, x) for x in coproc_reg_expr]))
+all_regs_ids_init = all_regs_ids_init + coproc_reg_init
+
+for i, r in enumerate(coproc_reg_expr):
+    regs_init[r] = coproc_reg_init[i]
+
 regs_flt_expr = []
diff --git a/miasm/arch/arm/sem.py b/miasm/arch/arm/sem.py
index 569a9a23..027c3a6a 100644
--- a/miasm/arch/arm/sem.py
+++ b/miasm/arch/arm/sem.py
@@ -8,6 +8,219 @@ from miasm.arch.arm.regs import *
 
 from miasm.jitter.csts import EXCEPT_DIV_BY_ZERO, EXCEPT_INT_XX
 
+coproc_reg_dict = {
+        ("p15", "c0", 0, "c0", 0): MIDR,
+        ("p15", "c0", 0, "c0", 1): CTR,
+        ("p15", "c0", 0, "c0", 2): TCMTR,
+        ("p15", "c0", 0, "c0", 3): TLBTR,
+        ("p15", "c0", 0, "c0", 4): MIDR,
+        ("p15", "c0", 0, "c0", 5): MPIDR,
+        ("p15", "c0", 0, "c0", 6): REVIDR,
+        ("p15", "c0", 0, "c0", 7): MIDR,
+
+        ("p15", "c0", 0, "c1", 0): ID_PFR0,
+        ("p15", "c0", 0, "c1", 1): ID_PFR1,
+        ("p15", "c0", 0, "c1", 2): ID_DFR0,
+        ("p15", "c0", 0, "c1", 3): ID_AFR0,
+        ("p15", "c0", 0, "c1", 4): ID_MMFR0,
+        ("p15", "c0", 0, "c1", 5): ID_MMFR1,
+        ("p15", "c0", 0, "c1", 6): ID_MMFR2,
+        ("p15", "c0", 0, "c1", 7): ID_MMFR3,
+
+        ("p15", "c0", 0, "c2", 0): ID_ISAR0,
+        ("p15", "c0", 0, "c2", 1): ID_ISAR1,
+        ("p15", "c0", 0, "c2", 2): ID_ISAR2,
+        ("p15", "c0", 0, "c2", 3): ID_ISAR3,
+        ("p15", "c0", 0, "c2", 4): ID_ISAR4,
+        ("p15", "c0", 0, "c2", 5): ID_ISAR5,
+
+        ("p15", "c0", 1, "c0", 0): CCSIDR,
+        ("p15", "c0", 1, "c0", 1): CLIDR,
+        ("p15", "c0", 1, "c0", 7): AIDR,
+
+        ("p15", "c0", 2, "c0", 0): CSSELR,
+
+        ("p15", "c0", 4, "c0", 0): VPIDR,
+        ("p15", "c0", 4, "c0", 5): VMPIDR,
+
+        ("p15", "c1", 0, "c0", 0): SCTLR,
+        ("p15", "c1", 0, "c0", 1): ACTLR,
+        ("p15", "c1", 0, "c0", 2): CPACR,
+
+        ("p15", "c1", 0, "c1", 0): SCR,
+        ("p15", "c1", 0, "c1", 1): SDER,
+        ("p15", "c1", 0, "c1", 2): NSACR,
+
+        ("p15", "c1", 4, "c0", 0): HSCTLR,
+        ("p15", "c1", 4, "c0", 1): HACTLR,
+
+        ("p15", "c1", 4, "c1", 0): HCR,
+        ("p15", "c1", 4, "c1", 1): HDCR,
+        ("p15", "c1", 4, "c1", 2): HCPTR,
+        ("p15", "c1", 4, "c1", 3): HSTR,
+        ("p15", "c1", 4, "c1", 7): HACR,
+
+        # TODO: TTBRO/TTBR1 64-bit
+        ("p15", "c2", 0, "c0", 0): TTBR0,
+        ("p15", "c2", 0, "c0", 1): TTBR1,
+        ("p15", "c2", 0, "c0", 2): TTBCR,
+
+        ("p15", "c2", 4, "c0", 2): HTCR,
+
+        ("p15", "c2", 4, "c1", 2): VTCR,
+
+        # TODO: HTTBR, VTTBR
+
+        ("p15", "c3", 0, "c0", 0): DACR,
+
+        ("p15", "c5", 0, "c0", 0): DFSR,
+        ("p15", "c5", 0, "c0", 1): IFSR,
+
+        ("p15", "c5", 0, "c1", 0): ADFSR,
+        ("p15", "c5", 0, "c1", 1): AIFSR,
+
+        ("p15", "c5", 4, "c1", 0): HADFSR,
+        ("p15", "c5", 4, "c1", 1): HAIFSR,
+
+        ("p15", "c5", 4, "c2", 0): HSR,
+
+        ("p15", "c6", 0, "c1", 0): DFAR,
+        ("p15", "c6", 0, "c1", 2): IFAR,
+
+        ("p15", "c6", 4, "c0", 0): HDFAR,
+        ("p15", "c6", 4, "c0", 2): HIFAR,
+        ("p15", "c6", 4, "c0", 4): HPFAR,
+
+        ("p15", "c7", 0, "c1", 0): ICIALLUIS,
+        ("p15", "c7", 0, "c1", 6): BPIALLIS,
+
+        ("p15", "c7", 0, "c4", 0): PAR,
+
+        # TODO: PAR 64-bit
+
+        ("p15", "c7", 0, "c5", 0): ICIALLU,
+        ("p15", "c7", 0, "c5", 1): ICIMVAU,
+        ("p15", "c7", 0, "c5", 4): CP15ISB,
+        ("p15", "c7", 0, "c5", 6): BPIALL,
+        ("p15", "c7", 0, "c5", 7): BPIMVA,
+
+        ("p15", "c7", 0, "c6", 1): DCIMVAC,
+        ("p15", "c7", 0, "c6", 2): DCISW,
+
+        ("p15", "c7", 0, "c8", 0): ATS1CPR,
+        ("p15", "c7", 0, "c8", 1): ATS1CPW,
+        ("p15", "c7", 0, "c8", 2): ATS1CUR,
+        ("p15", "c7", 0, "c8", 3): ATS1CUW,
+        ("p15", "c7", 0, "c8", 4): ATS12NSOPR,
+        ("p15", "c7", 0, "c8", 5): ATS12NSOPW,
+        ("p15", "c7", 0, "c8", 6): ATS12NSOUR,
+        ("p15", "c7", 0, "c8", 7): ATS12NSOUW,
+
+        ("p15", "c7", 0, "c10", 1): DCCMVAC,
+        ("p15", "c7", 0, "c10", 2): DCCSW,
+        ("p15", "c7", 0, "c10", 4): CP15DSB,
+        ("p15", "c7", 0, "c10", 5): CP15DMB,
+
+        ("p15", "c7", 0, "c11", 1): DCCMVAU,
+
+        ("p15", "c7", 0, "c14", 1): DCCIMVAC,
+        ("p15", "c7", 0, "c14", 2): DCCISW,
+
+        ("p15", "c7", 4, "c8", 0): ATS1HR,
+        ("p15", "c7", 4, "c8", 1): ATS1HW,
+
+        ("p15", "c8", 0, "c3", 0): TLBIALLIS,
+        ("p15", "c8", 0, "c3", 1): TLBIMVAIS,
+        ("p15", "c8", 0, "c3", 2): TLBIASIDIS,
+        ("p15", "c8", 0, "c3", 3): TLBIMVAAIS,
+
+        ("p15", "c8", 0, "c5", 0): ITLBIALL,
+        ("p15", "c8", 0, "c5", 1): ITLBIMVA,
+        ("p15", "c8", 0, "c5", 2): ITLBIASID,
+
+        ("p15", "c8", 0, "c6", 0): DTLBIALL,
+        ("p15", "c8", 0, "c6", 1): DTLBIMVA,
+        ("p15", "c8", 0, "c6", 2): DTLBIASID,
+
+        ("p15", "c8", 0, "c7", 0): TLBIALL,
+        ("p15", "c8", 0, "c7", 1): TLBIMVA,
+        ("p15", "c8", 0, "c7", 2): TLBIASID,
+        ("p15", "c8", 0, "c7", 3): TLBIMVAA,
+
+        ("p15", "c8", 4, "c3", 0): TLBIALLHIS,
+        ("p15", "c8", 4, "c3", 1): TLBIMVAHIS,
+        ("p15", "c8", 4, "c3", 4): TLBIALLNSNHIS,
+
+        ("p15", "c8", 4, "c7", 0): TLBIALLH,
+        ("p15", "c8", 4, "c7", 1): TLBIMVAH,
+        ("p15", "c8", 4, "c7", 2): TLBIALLNSNH,
+
+        ("p15", "c9", 0, "c12", 0): PMCR,
+        ("p15", "c9", 0, "c12", 1): PMCNTENSET,
+        ("p15", "c9", 0, "c12", 2): PMCNTENCLR,
+        ("p15", "c9", 0, "c12", 3): PMOVSR,
+        ("p15", "c9", 0, "c12", 4): PMSWINC,
+        ("p15", "c9", 0, "c12", 5): PMSELR,
+        ("p15", "c9", 0, "c12", 6): PMCEID0,
+        ("p15", "c9", 0, "c12", 7): PMCEID1,
+
+        ("p15", "c9", 0, "c13", 0): PMCCNTR,
+        ("p15", "c9", 0, "c13", 1): PMXEVTYPER,
+        ("p15", "c9", 0, "c13", 2): PMXEVCNTR,
+
+        ("p15", "c9", 0, "c14", 0): PMUSERENR,
+        ("p15", "c9", 0, "c14", 1): PMINTENSET,
+        ("p15", "c9", 0, "c14", 2): PMINTENCLR,
+        ("p15", "c9", 0, "c14", 3): PMOVSSET,
+
+        ("p15", "c10", 0, "c2", 0): PRRR,   # ALIAS MAIR0
+        ("p15", "c10", 0, "c2", 1): NMRR,   # ALIAS MAIR1
+
+        ("p15", "c10", 0, "c3", 0): AMAIR0,
+        ("p15", "c10", 0, "c3", 1): AMAIR1,
+
+        ("p15", "c10", 4, "c2", 0): HMAIR0,
+        ("p15", "c10", 4, "c2", 1): HMAIR1,
+
+        ("p15", "c10", 4, "c3", 0): HAMAIR0,
+        ("p15", "c10", 4, "c3", 1): HAMAIR1,
+
+        ("p15", "c12", 0, "c0", 0): VBAR,
+        ("p15", "c12", 0, "c0", 1): MVBAR,
+
+        ("p15", "c12", 0, "c1", 0): ISR,
+
+        ("p15", "c12", 4, "c0", 0): HVBAR,
+
+        ("p15", "c13", 0, "c0", 0): FCSEIDR,
+        ("p15", "c13", 0, "c0", 1): CONTEXTIDR,
+        ("p15", "c13", 0, "c0", 2): TPIDRURW,
+        ("p15", "c13", 0, "c0", 3): TPIDRURO,
+        ("p15", "c13", 0, "c0", 4): TPIDRPRW,
+
+        ("p15", "c13", 4, "c0", 2): HTPIDR,
+
+        ("p15", "c14", 0, "c0", 0): CNTFRQ,
+        # TODO: CNTPCT 64-bit
+
+        ("p15", "c14", 0, "c1", 0): CNTKCTL,
+
+        ("p15", "c14", 0, "c2", 0): CNTP_TVAL,
+        ("p15", "c14", 0, "c2", 1): CNTP_CTL,
+
+        ("p15", "c14", 0, "c3", 0): CNTV_TVAL,
+        ("p15", "c14", 0, "c3", 1): CNTV_CTL,
+
+        # TODO: CNTVCT, CNTP_CVAL, CNTV_CVAL, CNTVOFF 64-bit
+
+        ("p15", "c14", 4, "c1", 0): CNTHCTL,
+
+        ("p15", "c14", 4, "c2", 0): CNTHP_TVAL,
+        ("p15", "c14", 4, "c2", 0): CNTHP_CTL
+
+        # TODO: CNTHP_CVAL 64-bit
+        }
+
 # liris.cnrs.fr/~mmrissa/lib/exe/fetch.php?media=armv7-a-r-manual.pdf
 EXCEPT_SOFT_BP = (1 << 1)
 
@@ -1319,6 +1532,10 @@ def dsb(ir, instr, a):
     e = []
     return e, []
 
+def isb(ir, instr, a):
+    # XXX TODO
+    e = []
+    return e, []
 
 def cpsie(ir, instr, a):
     # XXX TODO
@@ -1376,6 +1593,25 @@ def pkhtb(ir, instr, arg1, arg2, arg3):
     )
     return e, []
 
+def mrc(ir, insr, arg1, arg2, arg3, arg4, arg5, arg6):
+    e = []
+    sreg = (str(arg1), str(arg4), int(arg2), str(arg5), int(arg6))
+    if sreg in coproc_reg_dict:
+        e.append(ExprAssign(arg3, coproc_reg_dict[sreg]))
+    else:
+        raise NotImplementedError("Unknown coprocessor register: %s %s %d %s %d" % (str(arg1), str(arg4), int(arg2), str(arg5), int(arg6)))
+
+    return e, []
+
+def mcr(ir, insr, arg1, arg2, arg3, arg4, arg5, arg6):
+    e = []
+    sreg = (str(arg1), str(arg4), int(arg2), str(arg5), int(arg6))
+    if sreg in coproc_reg_dict:
+        e.append(ExprAssign(coproc_reg_dict[sreg], arg3))
+    else:
+        raise NotImplementedError("Unknown coprocessor register: %s %s %d %s %d" % (str(arg1), str(arg4), int(arg2), str(arg5), int(arg6)))
+
+    return e, []
 
 COND_EQ = 0
 COND_NE = 1
@@ -1516,6 +1752,9 @@ mnemo_condm0 = {'add': add,
                 'sdiv': sdiv,
                 'udiv': udiv,
 
+                'mrc': mrc,
+                'mcr': mcr,
+
                 'mul': mul,
                 'umull': umull,
                 'umlal': umlal,
@@ -1629,6 +1868,7 @@ mnemo_nocond = {'lsr': lsr,
                 'tbh': tbh,
                 'nop': nop,
                 'dsb': dsb,
+                'isb': isb,
                 'cpsie': cpsie,
                 'cpsid': cpsid,
                 'wfe': wfe,
@@ -1774,7 +2014,7 @@ class ir_arml(IntermediateRepresentation):
             index += 1
             instr = block.lines[index]
 
-            # Add conditionnal jump to current irblock
+            # Add conditional jump to current irblock
             loc_do = self.loc_db.add_location()
             loc_next = self.get_next_loc_key(instr)
 
diff --git a/miasm/arch/mep/sem.py b/miasm/arch/mep/sem.py
index c1585d35..df484ab5 100644
--- a/miasm/arch/mep/sem.py
+++ b/miasm/arch/mep/sem.py
@@ -334,7 +334,7 @@ if False:
 def sltu3(r0, rn, rm_or_imm5):
     """SLTU3 - Set on less than (unsigned)."""
 
-    # if (Rn<Rm) R0<-1 else R0<-0 (Unigned)
+    # if (Rn<Rm) R0<-1 else R0<-0 (Unsigned)
     # if (Rn<ZeroExt(imm5)) R0<-1 else R0<-0(Unsigned)
     r0 = i32(1) if compute_u_inf(rn, rm_or_imm5) else i32(0)
 
diff --git a/miasm/arch/mips32/arch.py b/miasm/arch/mips32/arch.py
index f1e52585..0398be37 100644
--- a/miasm/arch/mips32/arch.py
+++ b/miasm/arch/mips32/arch.py
@@ -47,8 +47,8 @@ class additional_info(object):
         self.except_on_instr = False
 
 br_0 = ['B', 'J', 'JR', 'BAL', 'JAL', 'JALR']
-br_1 = ['BGEZ', 'BLTZ', 'BGTZ', 'BLEZ', 'BC1T', 'BC1F']
-br_2 = ['BEQ', 'BEQL', 'BNE']
+br_1 = ['BGEZ', 'BLTZ', 'BGTZ', 'BGTZL', 'BLEZ', 'BLEZL', 'BC1T', 'BC1TL', 'BC1F', 'BC1FL']
+br_2 = ['BEQ', 'BEQL', 'BNE', 'BNEL']
 
 
 class instruction_mips32(cpu.instruction):
@@ -321,6 +321,26 @@ class mips32_s16imm_noarg(mips32_imm):
         self.value = v
         return True
 
+
+class mips32_s09imm_noarg(mips32_imm):
+    def decode(self, v):
+        v = v & self.lmask
+        v = cpu.sign_ext(v, 9, 32)
+        self.expr = ExprInt(v, 32)
+        return True
+
+    def encode(self):
+        if not isinstance(self.expr, ExprInt):
+            return False
+        v = int(self.expr)
+        if v & 0x80000000:
+            nv = v & ((1 << 9) - 1)
+            assert( v == cpu.sign_ext(nv, 9, 32))
+            v = nv
+        self.value = v
+        return True
+
+
 class mips32_soff_noarg(mips32_imm):
     def decode(self, v):
         v = v & self.lmask
@@ -346,6 +366,9 @@ class mips32_soff_noarg(mips32_imm):
 class mips32_s16imm(mips32_s16imm_noarg, mips32_arg):
     pass
 
+class mips32_s09imm(mips32_s09imm_noarg, mips32_arg):
+    pass
+
 class mips32_soff(mips32_soff_noarg, mips32_arg):
     pass
 
@@ -471,16 +494,22 @@ fd = cpu.bs(l=5, cls=(mips32_fltpreg,))
 
 s16imm = cpu.bs(l=16, cls=(mips32_s16imm,))
 u16imm = cpu.bs(l=16, cls=(mips32_u16imm,))
+s09imm = cpu.bs(l=9, cls=(mips32_s09imm,))
 sa = cpu.bs(l=5, cls=(mips32_u16imm,))
 base = cpu.bs(l=5, cls=(mips32_dreg_imm,))
 soff = cpu.bs(l=16, cls=(mips32_soff,))
+oper = cpu.bs(l=5, cls=(mips32_u16imm,))
 
 cpr0 = cpu.bs(l=5, cls=(mips32_imm,), fname="cpr0")
 cpr =  cpu.bs(l=3, cls=(mips32_cpr,))
 
+stype = cpu.bs(l=5, cls=(mips32_u16imm,))
+hint_pref = cpu.bs(l=5, cls=(mips32_u16imm,))
 
 s16imm_noarg = cpu.bs(l=16, cls=(mips32_s16imm_noarg,), fname="imm",
                   order=-1)
+s09imm_noarg = cpu.bs(l=9, cls=(mips32_s09imm_noarg,), fname="imm",
+                  order=-1)
 
 hint = cpu.bs(l=5, default_val="00000")
 fcc = cpu.bs(l=3, cls=(mips32_fccreg,))
@@ -669,13 +698,18 @@ mips32op("mfhi",    [cpu.bs('000000'), cpu.bs('0000000000'), rd,
 mips32op("b",       [cpu.bs('000100'), cpu.bs('00000'), cpu.bs('00000'), soff],
          alias = True)
 mips32op("bne",     [cpu.bs('000101'), rs, rt, soff])
+mips32op("bnel",    [cpu.bs('010101'), rs, rt, soff])
+
 mips32op("beq",     [cpu.bs('000100'), rs, rt, soff])
+mips32op("beql",    [cpu.bs('010100'), rs, rt, soff])
 
 mips32op("blez",    [cpu.bs('000110'), rs, cpu.bs('00000'), soff])
+mips32op("blezl",   [cpu.bs('010110'), rs, cpu.bs('00000'), soff])
 
 mips32op("bcc",     [cpu.bs('000001'), rs, bs_bcc, soff])
 
 mips32op("bgtz",    [cpu.bs('000111'), rs, cpu.bs('00000'), soff])
+mips32op("bgtzl",   [cpu.bs('010111'), rs, cpu.bs('00000'), soff])
 mips32op("bal",     [cpu.bs('000001'), cpu.bs('00000'), cpu.bs('10001'), soff],
          alias = True)
 
@@ -698,7 +732,6 @@ mips32op("mtc0",    [cpu.bs('010000'), cpu.bs('00100'), rt, cpr0,
                      cpu.bs('00000000'), cpr])
 mips32op("mtc1",    [cpu.bs('010001'), cpu.bs('00100'), rt, fs,
                      cpu.bs('00000000000')])
-
 # XXXX TODO CFC1
 mips32op("cfc1",    [cpu.bs('010001'), cpu.bs('00010'), rt, fs,
                      cpu.bs('00000000000')])
@@ -716,8 +749,12 @@ mips32op("c",       [cpu.bs('010001'), bs_fmt, ft, fs, fcc, cpu.bs('0'),
 
 mips32op("bc1t",    [cpu.bs('010001'), cpu.bs('01000'), fcc, cpu.bs('0'),
                      cpu.bs('1'), soff])
+mips32op("bc1tl",    [cpu.bs('010001'), cpu.bs('01000'), fcc, cpu.bs('1'),
+                     cpu.bs('1'), soff])
 mips32op("bc1f",    [cpu.bs('010001'), cpu.bs('01000'), fcc, cpu.bs('0'),
                      cpu.bs('0'), soff])
+mips32op("bc1fl",    [cpu.bs('010001'), cpu.bs('01000'), fcc, cpu.bs('1'),
+                     cpu.bs('0'), soff])
 
 mips32op("swc1",    [cpu.bs('111001'), base, ft, s16imm_noarg], [ft, base])
 
@@ -754,3 +791,33 @@ mips32op("tlbwi",   [cpu.bs('010000'), cpu.bs('1'), cpu.bs('0'*19),
 
 mips32op("teq",     [cpu.bs('000000'), rs, rt, bs_code, cpu.bs('110100')],
          [rs, rt])
+mips32op("tne",     [cpu.bs('000000'), rs, rt, bs_code, cpu.bs('110110')],         
+         [rs, rt])
+
+mips32op("clz",     [cpu.bs('011100'), rs, rt, rd, cpu.bs('00000'), cpu.bs('100000')],
+        [rd, rs])
+mips32op("clz",     [cpu.bs('000000'), rs, cpu.bs('00000'), rd, cpu.bs('00001010000')],
+        [rd, rs])
+
+mips32op("ll",      [cpu.bs('110000'), base, rt, s16imm_noarg], [rt, base])
+mips32op("ll",      [cpu.bs('011111'), base, rt, s09imm_noarg, cpu.bs('0110110')], [rt, base])
+
+mips32op("sc",      [cpu.bs('111000'), base, rt, s16imm_noarg], [rt, base])
+mips32op("sc",      [cpu.bs('011111'), base, rt, s09imm_noarg, cpu.bs('0'), cpu.bs('100110')], [rt, base])
+
+mips32op("sync",    [cpu.bs('000000000000000000000'), stype, cpu.bs('001111')], [stype])
+
+mips32op("pref",    [cpu.bs('110011'), base, hint_pref, s16imm_noarg], [hint_pref, base])
+mips32op("pref",    [cpu.bs('011111'), base, hint_pref, s09imm_noarg, cpu.bs('0110101')], [hint_pref, base])
+
+mips32op("tlbwr",   [cpu.bs('01000010000000000000000000000110')], [])
+mips32op("tlbr",    [cpu.bs('01000010000000000000000000000001')], [])
+
+mips32op("cache",   [cpu.bs('101111'), base, oper, s16imm_noarg], [oper, base])
+mips32op("cache",   [cpu.bs('011111'), base, oper, s09imm_noarg, cpu.bs('0100101')], [oper, base])
+
+mips32op("eret",    [cpu.bs('01000010000000000000000000011000')], [])
+
+mips32op("mtlo",    [cpu.bs('000000'), rs, cpu.bs('000000000000000'), cpu.bs('010011')], [rs])
+mips32op("mthi",    [cpu.bs('000000'), rs, cpu.bs('000000000000000'), cpu.bs('010001')], [rs])
+
diff --git a/miasm/arch/mips32/regs.py b/miasm/arch/mips32/regs.py
index 1513e989..967b7458 100644
--- a/miasm/arch/mips32/regs.py
+++ b/miasm/arch/mips32/regs.py
@@ -40,15 +40,43 @@ R_HI_init = ExprId('R_HI_init', 32)
 
 cpr0_str = ["CPR0_%d"%x for x in range(0x100)]
 cpr0_str[0] = "INDEX"
+cpr0_str[8] = "RANDOM"
 cpr0_str[16] = "ENTRYLO0"
 cpr0_str[24] = "ENTRYLO1"
+cpr0_str[32] = "CONTEXT"
+cpr0_str[33] = "CONTEXTCONFIG"
 cpr0_str[40] = "PAGEMASK"
+cpr0_str[41] = "PAGEGRAIN"
+cpr0_str[42] = "SEGCTL0"
+cpr0_str[43] = "SEGCTL1"
+cpr0_str[44] = "SEGCTL2"
+cpr0_str[45] = "PWBASE"
+cpr0_str[46] = "PWFIELD"
+cpr0_str[47] = "PWSIZE"
+cpr0_str[48] = "WIRED"
+cpr0_str[54] = "PWCTL"
+cpr0_str[64] = "BADVADDR"
+cpr0_str[65] = "BADINSTR"
+cpr0_str[66] = "BADINSTRP"
 cpr0_str[72] = "COUNT"
 cpr0_str[80] = "ENTRYHI"
 cpr0_str[104] = "CAUSE"
 cpr0_str[112] = "EPC"
+cpr0_str[120] = "PRID"
+cpr0_str[121] = "EBASE"
 cpr0_str[128] = "CONFIG"
+cpr0_str[129] = "CONFIG1"
+cpr0_str[130] = "CONFIG2"
+cpr0_str[131] = "CONFIG3"
+cpr0_str[132] = "CONFIG4"
+cpr0_str[133] = "CONFIG5"
 cpr0_str[152] = "WATCHHI"
+cpr0_str[250] = "KSCRATCH"
+cpr0_str[251] = "KSCRATCH1"
+cpr0_str[252] = "KSCRATCH2"
+cpr0_str[253] = "KSCRATCH3"
+cpr0_str[254] = "KSCRATCH4"
+cpr0_str[255] = "KSCRATCH5"
 
 regs_cpr0_expr, regs_cpr0_init, regs_cpr0_info = gen_regs(cpr0_str, globals())
 
diff --git a/miasm/arch/mips32/sem.py b/miasm/arch/mips32/sem.py
index 5fc491a7..23684a8d 100644
--- a/miasm/arch/mips32/sem.py
+++ b/miasm/arch/mips32/sem.py
@@ -67,6 +67,12 @@ def lbu(arg1, arg2):
     arg1 = mem8[arg2.ptr].zeroExtend(32)
 
 @sbuild.parse
+def lh(arg1, arg2):
+    """A word is loaded into a register @arg1 from the
+    specified address @arg2."""
+    arg1 = mem16[arg2.ptr].signExtend(32)
+
+@sbuild.parse
 def lhu(arg1, arg2):
     """A word is loaded (unsigned extended) into a register @arg1 from the
     specified address @arg2."""
@@ -78,6 +84,11 @@ def lb(arg1, arg2):
     arg1 = mem8[arg2.ptr].signExtend(32)
 
 @sbuild.parse
+def ll(arg1, arg2):
+    "To load a word from memory for an atomic read-modify-write"
+    arg1 = arg2
+
+@sbuild.parse
 def beq(arg1, arg2, arg3):
     "Branches on @arg3 if the quantities of two registers @arg1, @arg2 are eq"
     dst = arg3 if ExprOp(m2_expr.TOK_EQUAL, arg1, arg2) else ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size)
@@ -85,6 +96,13 @@ def beq(arg1, arg2, arg3):
     ir.IRDst = dst
 
 @sbuild.parse
+def beql(arg1, arg2, arg3):
+    "Branches on @arg3 if the quantities of two registers @arg1, @arg2 are eq"
+    dst = arg3 if ExprOp(m2_expr.TOK_EQUAL, arg1, arg2) else ExprLoc(ir.get_next_delay_loc_key(instr), ir.IRDst.size)
+    PC = dst
+    ir.IRDst = dst
+
+@sbuild.parse
 def bgez(arg1, arg2):
     """Branches on @arg2 if the quantities of register @arg1 is greater than or
     equal to zero"""
@@ -93,6 +111,14 @@ def bgez(arg1, arg2):
     ir.IRDst = dst
 
 @sbuild.parse
+def bgezl(arg1, arg2):
+    """Branches on @arg2 if the quantities of register @arg1 is greater than or
+    equal to zero"""
+    dst = ExprLoc(ir.get_next_delay_loc_key(instr), ir.IRDst.size) if ExprOp(m2_expr.TOK_INF_SIGNED, arg1, ExprInt(0, arg1.size)) else arg2
+    PC = dst
+    ir.IRDst = dst
+
+@sbuild.parse
 def bne(arg1, arg2, arg3):
     """Branches on @arg3 if the quantities of two registers @arg1, @arg2 are NOT
     equal"""
@@ -101,6 +127,14 @@ def bne(arg1, arg2, arg3):
     ir.IRDst = dst
 
 @sbuild.parse
+def bnel(arg1, arg2, arg3):
+    """Branches on @arg3 if the quantities of two registers @arg1, @arg2 are NOT
+    equal"""
+    dst = ExprLoc(ir.get_next_delay_loc_key(instr), ir.IRDst.size) if ExprOp(m2_expr.TOK_EQUAL, arg1, arg2) else arg3
+    PC = dst
+    ir.IRDst = dst
+
+@sbuild.parse
 def lui(arg1, arg2):
     """The immediate value @arg2 is shifted left 16 bits and stored in the
     register @arg1. The lower 16 bits are zeroes."""
@@ -111,6 +145,14 @@ def nop():
     """Do nothing"""
 
 @sbuild.parse
+def sync(arg1):
+    """Synchronize Shared Memory"""
+
+@sbuild.parse
+def pref(arg1, arg2):
+    """To move data between memory and cache"""
+
+@sbuild.parse
 def j(arg1):
     """Jump to an address @arg1"""
     PC = arg1
@@ -248,6 +290,13 @@ def bltz(arg1, arg2):
     ir.IRDst = dst_o
 
 @sbuild.parse
+def bltzl(arg1, arg2):
+    """Branches on @arg2 if the register @arg1 is less than zero"""
+    dst_o = arg2 if ExprOp(m2_expr.TOK_INF_SIGNED, arg1, ExprInt(0, arg1.size)) else ExprLoc(ir.get_next_delay_loc_key(instr), ir.IRDst.size)
+    PC = dst_o
+    ir.IRDst = dst_o
+
+@sbuild.parse
 def blez(arg1, arg2):
     """Branches on @arg2 if the register @arg1 is less than or equal to zero"""
     cond = ExprOp(m2_expr.TOK_INF_EQUAL_SIGNED, arg1, ExprInt(0, arg1.size))
@@ -256,6 +305,14 @@ def blez(arg1, arg2):
     ir.IRDst = dst_o
 
 @sbuild.parse
+def blezl(arg1, arg2):
+    """Branches on @arg2 if the register @arg1 is less than or equal to zero"""
+    cond = ExprOp(m2_expr.TOK_INF_EQUAL_SIGNED, arg1, ExprInt(0, arg1.size))
+    dst_o = arg2 if cond else ExprLoc(ir.get_next_delay_loc_key(instr), ir.IRDst.size)
+    PC = dst_o
+    ir.IRDst = dst_o
+
+@sbuild.parse
 def bgtz(arg1, arg2):
     """Branches on @arg2 if the register @arg1 is greater than zero"""
     cond =  ExprOp(m2_expr.TOK_INF_EQUAL_SIGNED, arg1, ExprInt(0, arg1.size))
@@ -264,6 +321,14 @@ def bgtz(arg1, arg2):
     ir.IRDst = dst_o
 
 @sbuild.parse
+def bgtzl(arg1, arg2):
+    """Branches on @arg2 if the register @arg1 is greater than zero"""
+    cond =  ExprOp(m2_expr.TOK_INF_EQUAL_SIGNED, arg1, ExprInt(0, arg1.size))
+    dst_o = ExprLoc(ir.get_next_delay_loc_key(instr), ir.IRDst.size) if cond else arg2
+    PC = dst_o
+    ir.IRDst = dst_o
+
+@sbuild.parse
 def wsbh(arg1, arg2):
     arg1 = ExprCompose(arg2[8:16], arg2[0:8], arg2[24:32], arg2[16:24])
 
@@ -320,6 +385,14 @@ def tlbwi():
 def tlbp():
     "TODO XXX"
 
+@sbuild.parse
+def tlbwr():
+    "TODO XXX"
+
+@sbuild.parse
+def tlbr():
+    "TODO XXX"
+
 def ins(ir, instr, a, b, c, d):
     e = []
     pos = int(c)
@@ -327,12 +400,12 @@ def ins(ir, instr, a, b, c, d):
 
     my_slices = []
     if pos != 0:
-        my_slices.append((a[:pos], 0, pos))
+        my_slices.append(a[:pos])
     if l != 0:
-        my_slices.append((b[:l], pos, pos+l))
+        my_slices.append(b[:l])
     if pos + l != 32:
-        my_slices.append((a[pos+l:], pos+l, 32))
-    r = m2_expr.ExprCompose(my_slices)
+        my_slices.append(a[pos+l:])
+    r = m2_expr.ExprCompose(*my_slices)
     e.append(m2_expr.ExprAssign(a, r))
     return e, []
 
@@ -364,12 +437,24 @@ def bc1t(arg1, arg2):
     ir.IRDst = dst_o
 
 @sbuild.parse
+def bc1tl(arg1, arg2):
+    dst_o = arg2 if arg1 else ExprLoc(ir.get_next_delay_loc_key(instr), ir.IRDst.size)
+    PC = dst_o
+    ir.IRDst = dst_o
+
+@sbuild.parse
 def bc1f(arg1, arg2):
     dst_o = ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) if arg1 else arg2
     PC = dst_o
     ir.IRDst = dst_o
 
 @sbuild.parse
+def bc1fl(arg1, arg2):
+    dst_o = ExprLoc(ir.get_next_delay_loc_key(instr), ir.IRDst.size) if arg1 else arg2
+    PC = dst_o
+    ir.IRDst = dst_o
+
+@sbuild.parse
 def cvt_d_w(arg1, arg2):
     # TODO XXX
     arg1 = 'flt_d_w'(arg2)
@@ -424,6 +509,23 @@ def ei(arg1):
 def ehb(arg1):
     "NOP"
 
+@sbuild.parse
+def sc(arg1, arg2):
+    arg2 = arg1;
+    arg1 = ExprInt(0x1, 32)
+
+@sbuild.parse
+def mthi(arg1):
+    R_HI = arg1
+
+@sbuild.parse
+def mtlo(arg1):
+    R_LOW = arg1
+
+def clz(ir, instr, rs, rd):
+    e = []
+    e.append(ExprAssign(rd, ExprOp('cntleadzeros', rs)))
+    return e, []
 
 def teq(ir, instr, arg1, arg2):
     e = []
@@ -436,7 +538,7 @@ def teq(ir, instr, arg1, arg2):
     do_except.append(m2_expr.ExprAssign(exception_flags, m2_expr.ExprInt(
         EXCEPT_DIV_BY_ZERO, exception_flags.size)))
     do_except.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr))
-    blk_except = IRBlock(loc_except.index, [AssignBlock(do_except, instr)])
+    blk_except = IRBlock(loc_except, [AssignBlock(do_except, instr)])
 
     cond = arg1 - arg2
 
@@ -447,6 +549,28 @@ def teq(ir, instr, arg1, arg2):
 
     return e, [blk_except]
 
+def tne(ir, instr, arg1, arg2):
+    e = []
+
+    loc_except, loc_except_expr = ir.gen_loc_key_and_expr(ir.IRDst.size)
+    loc_next = ir.get_next_loc_key(instr)
+    loc_next_expr = m2_expr.ExprLoc(loc_next, ir.IRDst.size)
+
+    do_except = []
+    do_except.append(m2_expr.ExprAssign(exception_flags, m2_expr.ExprInt(
+        EXCEPT_DIV_BY_ZERO, exception_flags.size)))
+    do_except.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr))
+    blk_except = IRBlock(loc_except, [AssignBlock(do_except, instr)])
+
+    cond = arg1 ^ arg2
+
+
+    e = []
+    e.append(m2_expr.ExprAssign(ir.IRDst,
+                             m2_expr.ExprCond(cond, loc_next_expr, loc_except_expr)))
+
+    return e, [blk_except]
+
 
 mnemo_func = sbuild.functions
 mnemo_func.update({
@@ -473,8 +597,10 @@ mnemo_func.update({
         'subu': l_sub,
         'xor': l_xor,
         'xori': l_xor,
+        'clz': clz,
         'teq': teq,
-})
+        'tne': tne
+        })
 
 def get_mnemo_expr(ir, instr, *args):
     instr, extra_ir = mnemo_func[instr.name.lower()](ir, instr, *args)
@@ -511,6 +637,9 @@ class ir_mips32l(IntermediateRepresentation):
     def get_next_break_loc_key(self, instr):
         return self.loc_db.get_or_create_offset_location(instr.offset  + 8)
 
+    def get_next_delay_loc_key(self, instr):
+        return self.loc_db.get_or_create_offset_location(instr.offset + 16)
+
 class ir_mips32b(ir_mips32l):
     def __init__(self, loc_db=None):
         self.addrsize = 32
diff --git a/miasm/arch/ppc/arch.py b/miasm/arch/ppc/arch.py
index 8cd0181c..2b951027 100644
--- a/miasm/arch/ppc/arch.py
+++ b/miasm/arch/ppc/arch.py
@@ -443,6 +443,13 @@ class ppc_u04imm(ppc_u16imm):
 class ppc_u02imm_noarg(imm_noarg):
     pass
 
+class ppc_float(ppc_reg):
+    reg_info = floatregs
+    parser = reg_info.parser
+
+class ppc_vex(ppc_reg):
+    reg_info = vexregs
+    parser = reg_info.parser
 
 def ppc_bo_bi_to_mnemo(bo, bi, prefer_taken=True, default_taken=True):
     bo2mnemo = { 0: 'DNZF', 2: 'DZF', 4: 'F', 8: 'DNZT',
@@ -566,6 +573,16 @@ dregimm = bs(l=16, cls=(ppc_deref32,))
 
 rc_mod = bs_mod_name(l=1, mn_mod=['', '.'], fname='rc')
 
+frd = bs(l=5, cls=(ppc_float,))
+frb = bs(l=5, cls=(ppc_float,))
+frs = bs(l=5, cls=(ppc_float,))
+fm = bs(l=8, cls=(ppc_u08imm,))
+
+va = bs(l=5, cls=(ppc_vex,))
+vb = bs(l=5, cls=(ppc_vex,))
+vd = bs(l=5, cls=(ppc_vex,))
+rb_noarg = bs(l=5, cls=(ppc_gpreg_noarg,), fname="rb")
+
 arith1_name = {"MULLI": 0b000111, "SUBFIC": 0b001000, "ADDIC": 0b001100,
                "ADDIC.": 0b001101 }
 
@@ -636,6 +653,17 @@ dcb_name = {"DCBST": 0b00001, "DCBF": 0b00010,
             "DCBI": 0b01110, "DCBA": 0b10111,
             "ICBI": 0b11110, "DCBZ": 0b11111 }
 
+
+load1_name_float = {"LFS": 0b110000, "LFD": 0b110010 }
+load1_name_float_u = {"LFSU": 0b110001, "LFDU": 0b110011 }
+store1_name_float = {"STFS": 0b110100, "STFD": 0b110110 }
+store1_name_float_u = {"STFSU": 0b110101, "STFDU": 0b110111 }
+
+load1_name_vex = {"LVEBX": 0b0000000111, "LVEHX": 0b0000100111,
+                  "LVEWX": 0b0001000111, "LVSL": 0b0000000110,
+                  "LVSR": 0b0000100110, "LVX": 0b0001100111,
+                  "LVXL": 0b0101100111 }
+
 class bs_mod_name_prio4(bs_mod_name):
     prio = 4
 
@@ -762,3 +790,15 @@ ppcop("SRAWI", [bs('011111'), rs, ra, sh, bs('1100111000'), rc_mod],
       [ra, rs, sh])
 
 ppcop("EIEIO", [bs('011111'), bs('000000000000000'), bs('11010101100')])
+
+ppcop("load1f", [bs_name(l=6, name=load1_name_float), frd, ra_noarg, dregimm])
+ppcop("load1fu", [bs_name(l=6, name=load1_name_float_u), frd, ra_noarg, dregimm])
+ppcop("store1f", [bs_name(l=6, name=store1_name_float), frd, ra_noarg, dregimm])
+ppcop("store1fu", [bs_name(l=6, name=store1_name_float_u), frd, ra_noarg, dregimm])
+ppcop("MTFSF", [bs('111111'), bs('0'), fm, bs('0'), frb, bs('10110001110')])
+ppcop("MTFSF.", [bs('111111'), bs('0'), fm, bs('0'), frb, bs('10110001111')])
+ppcop("MFFS", [bs('111111'), frd, bs('00000000001001000111'), bs('0')])
+ppcop("MFFS.", [bs('111111'), frd, bs('00000000001001000111'), bs('1')])
+
+ppcop("load1vex", [bs('011111'), vd, ra, rb, bs_name(l=10, name=load1_name_vex), bs('0')])
+ppcop("mtvscr", [bs('0001000000000000'), vb, bs('11001000100')])
diff --git a/miasm/arch/ppc/regs.py b/miasm/arch/ppc/regs.py
index 4b710045..00781d6a 100644
--- a/miasm/arch/ppc/regs.py
+++ b/miasm/arch/ppc/regs.py
@@ -35,7 +35,7 @@ xerbcreg_expr, xerbcreg_init, xerbcreg = gen_regs(xerbcreg_str,
                                                   globals(), 7)
 
 
-otherregs_str = ["PC", "CTR", "LR" ]
+otherregs_str = ["PC", "CTR", "LR", "FPSCR", "VRSAVE", "VSCR" ]
 otherregs_expr, otherregs_init, otherregs = gen_regs(otherregs_str,
                                                      globals(), 32)
 
@@ -55,10 +55,18 @@ mmuregs_str = (["SR%d" % i for i in range(16)] +
 mmuregs_expr, mmuregs_init, mmuregs = gen_regs(mmuregs_str,
                                                globals(), 32)
 
+floatregs_str = (["FPR%d" % i for i in range(32)])
+floatregs_expr, floatregs_init, floatregs = gen_regs(floatregs_str,
+                                                     globals(), 64)
+
+vexregs_str = (["VR%d" % i for i in range(32)])
+vexregs_expr, vexregs_init, vexregs = gen_regs(vexregs_str,
+                                              globals(), 128)
+
 regs_flt_expr = []
 
 all_regs_ids = (gpregs_expr + crfbitregs_expr + xerbitregs_expr +
-                xerbcreg_expr + otherregs_expr + superregs_expr + mmuregs_expr +
+                xerbcreg_expr + otherregs_expr + superregs_expr + mmuregs_expr + floatregs_expr + vexregs_expr +
                 [ exception_flags, spr_access, reserve, reserve_address ])
 all_regs_ids_byname = dict([(x.name, x) for x in all_regs_ids])
 all_regs_ids_init = [ExprId("%s_init" % x.name, x.size) for x in all_regs_ids]
diff --git a/miasm/arch/ppc/sem.py b/miasm/arch/ppc/sem.py
index cc0be7d7..7ca7e3e1 100644
--- a/miasm/arch/ppc/sem.py
+++ b/miasm/arch/ppc/sem.py
@@ -25,6 +25,20 @@ sr_dict = {
     12: SR12, 13: SR13, 14: SR14, 15: SR15
 }
 
+float_dict = {
+    0: FPR0, 1: FPR1, 2: FPR2, 3: FPR3, 4: FPR4, 5: FPR5, 6: FPR6, 7: FPR7, 8: FPR8,
+    9: FPR9, 10: FPR10, 11: FPR11, 12: FPR12, 13: FPR13, 14: FPR14, 15: FPR15, 16: FPR16,
+    17: FPR17, 18: FPR18, 19: FPR19, 20: FPR20, 21: FPR21, 22: FPR22, 23: FPR23, 24: FPR24,
+    25: FPR25, 26: FPR26, 27: FPR27, 28: FPR28, 29: FPR29, 30: FPR30, 31: FPR31
+}
+
+vex_dict = {
+    0: VR0, 1: VR1, 2: VR2, 3: VR3, 4: VR4, 5: VR5, 6: VR6, 7: VR7, 8: VR8,
+    9: VR9, 10: VR10, 11: VR11, 12: VR12, 13: VR13, 14: VR14, 15: VR15, 16: VR16,
+    17: VR17, 18: VR18, 19: VR19, 20: VR20, 21: VR21, 22: VR22, 23: VR23, 24: VR24,
+    25: VR25, 26: VR26, 27: VR27, 28: VR28, 29: VR29, 30: VR30, 31: VR31,
+}
+
 crf_dict = dict((ExprId("CR%d" % i, 4),
                  dict( (bit, ExprId("CR%d_%s" % (i, bit), 1))
                        for bit in ['LT', 'GT', 'EQ', 'SO' ] ))
@@ -34,6 +48,8 @@ ctx = {
     'crf_dict': crf_dict,
     'spr_dict': spr_dict,
     'sr_dict': sr_dict,
+    'float_dict': float_dict,
+    'vex_dict': vex_dict,
     'expr': expr,
 }
 
@@ -232,8 +248,8 @@ def mn_do_exts(ir, instr, ra, rs):
 
 def byte_swap(expr):
     nbytes = expr.size // 8
-    bytes = [ expr[i*8:i*8+8] for i in range(nbytes - 1, -1, -1) ]
-    return ExprCompose(bytes)
+    lbytes = [ expr[i*8:i*8+8] for i in range(nbytes - 1, -1, -1) ]
+    return ExprCompose(*lbytes)
 
 def mn_do_load(ir, instr, arg1, arg2, arg3=None):
     assert instr.name[0] == 'L'
@@ -244,6 +260,12 @@ def mn_do_load(ir, instr, arg1, arg2, arg3=None):
         return mn_do_lmw(ir, instr, arg1, arg2)
     elif instr.name[1] == 'S':
         raise RuntimeError("LSWI, and LSWX need implementing")
+    elif instr.name[1] == 'F':
+        print("Warning, instruction %s implemented as NOP" % instr)
+        return  [], []
+    elif instr.name[1] == 'V':
+        print("Warning, instruction %s implemented as NOP" % instr)
+        return [], []
 
     size = {'B': 8, 'H': 16, 'W': 32}[instr.name[1]]
 
@@ -298,7 +320,7 @@ def mn_do_load(ir, instr, arg1, arg2, arg3=None):
 
 def mn_do_lmw(ir, instr, rd, src):
     ret = []
-    address = int(src)
+    address = src.ptr
     ri = int(rd.name[1:],10)
     i = 0
     while ri <= 31:
@@ -505,7 +527,7 @@ def mn_do_rfi(ir, instr):
     ret = [ ExprAssign(MSR, (MSR &
                           ~ExprInt(0b1111111101110011, 32) |
                           ExprCompose(SRR1[0:2], ExprInt(0, 2),
-                                      SRR1[4:7], ExprInt(0, 1), 
+                                      SRR1[4:7], ExprInt(0, 1),
                                       SRR1[8:16], ExprInt(0, 16)))),
             ExprAssign(PC, dest),
             ExprAssign(ir.IRDst, dest) ]
@@ -580,7 +602,7 @@ def mn_do_srw(ir, instr, ra, rs, rb):
 
 def mn_do_stmw(ir, instr, rs, dest):
     ret = []
-    address = int(dest)
+    address = dest.ptr
     ri = int(rs.name[1:],10)
     i = 0
     while ri <= 31:
@@ -599,6 +621,9 @@ def mn_do_store(ir, instr, arg1, arg2, arg3=None):
 
     if instr.name[2] == 'S':
         raise RuntimeError("STSWI, and STSWX need implementing")
+    elif instr.name[2] == 'F':
+        print("Warning, instruction %s implemented as NOP" % instr)
+        return  [], []
 
     size = {'B': 8, 'H': 16, 'W': 32}[instr.name[2]]
 
@@ -650,8 +675,8 @@ def mn_do_store(ir, instr, arg1, arg2, arg3=None):
         ret.append(ExprAssign(ir.IRDst, loc_next))
         dont = flags + [ ExprAssign(CR0_EQ, ExprInt(0,1)),
                          ExprAssign(ir.IRDst, loc_next) ]
-        additional_ir = [ IRBlock(loc_do, [ AssignBlock(ret) ]),
-                          IRBlock(loc_dont, [ AssignBlock(dont) ]) ]
+        additional_ir = [ IRBlock(loc_do.loc_key, [ AssignBlock(ret) ]),
+                          IRBlock(loc_dont.loc_key, [ AssignBlock(dont) ]) ]
         ret = [ ExprAssign(reserve, ExprInt(0, 1)),
                 ExprAssign(ir.IRDst, ExprCond(reserve, loc_do, loc_dont)) ]
 
@@ -834,16 +859,21 @@ sem_dir = {
     'MCRF': mn_do_mcrf,
     'MCRXR': mn_do_mcrxr,
     'MFCR': mn_do_mfcr,
+    'MFFS': mn_do_nop_warn,
+    'MFFS.': mn_do_nop_warn,
     'MFMSR': mn_mfmsr,
     'MFSPR': mn_mfspr,
     'MFSR': mn_mfsr,
     'MFSRIN': mn_do_nop_warn,
-    'MFTB': mn_mfmsr,
+    'MTFSF': mn_do_nop_warn,
+    'MTFSF.': mn_do_nop_warn,
+    'MFTB': mn_mfspr,
     'MTCRF': mn_mtcrf,
     'MTMSR': mn_mtmsr,
     'MTSPR': mn_mtspr,
     'MTSR': mn_mtsr,
     'MTSRIN': mn_do_nop_warn,
+    'MTVSCR': mn_do_nop_warn,
     'NAND': mn_do_nand,
     'NAND.': mn_do_nand,
     'NOR': mn_do_nor,
@@ -928,9 +958,9 @@ class ir_ppc32b(IntermediateRepresentation):
             instr_ir, extra_ir = mn_do_store(self, instr, *args)
         elif instr.name[0:4] == 'SUBF':
             if instr.name[0:5] == 'SUBFZ':
-                last_arg = ExprInt(0)
+                last_arg = ExprInt(0, 32)
             elif instr.name[0:5] == 'SUBFM':
-                last_arg = ExprInt(0xFFFFFFFF)
+                last_arg = ExprInt(0xFFFFFFFF, 32)
             else:
                 last_arg = args[2]
             instr_ir, extra_ir = mn_do_sub(self, instr, args[0], args[1],
diff --git a/miasm/arch/x86/arch.py b/miasm/arch/x86/arch.py
index 725f3126..127dded4 100644
--- a/miasm/arch/x86/arch.py
+++ b/miasm/arch/x86/arch.py
@@ -481,7 +481,7 @@ class instruction_x86(instruction):
         expr = self.args[0]
         if not expr.is_int():
             return
-        addr = expr.arg + int(self.offset)
+        addr = (int(expr) + int(self.offset)) & int(expr.mask)
         loc_key = loc_db.get_or_create_offset_location(addr)
         self.args[0] = ExprLoc(loc_key, expr.size)
 
@@ -547,7 +547,7 @@ class instruction_x86(instruction):
 
     def __str__(self):
         return self.to_string()
-      
+
     def to_string(self, loc_db=None):
         o = super(instruction_x86, self).to_string(loc_db)
         if self.additional_info.g1.value & 1:
@@ -1913,7 +1913,10 @@ def modrm2expr(modrm, parent, w8, sx=0, xmm=0, mm=0, bnd=0):
         if parent.disp.value is None:
             return None
         o.append(ExprInt(int(parent.disp.expr), admode))
-    expr = ExprOp('+', *o)
+    if len(o) == 1:
+        expr = o[0]
+    else:
+        expr = ExprOp('+', *o)
     if w8 == 0:
         opmode = 8
     elif sx == 1:
diff --git a/miasm/arch/x86/sem.py b/miasm/arch/x86/sem.py
index b0fcd054..86a933a0 100644
--- a/miasm/arch/x86/sem.py
+++ b/miasm/arch/x86/sem.py
@@ -1162,7 +1162,9 @@ def setalc(_, instr):
 def bswap(_, instr, dst):
     e = []
     if dst.size == 16:
-        result = m2_expr.ExprCompose(dst[8:16], dst[:8])
+        # BSWAP referencing a 16-bit register is undefined
+        # Seems to return 0 actually
+        result = m2_expr.ExprInt(0, 16)
     elif dst.size == 32:
         result = m2_expr.ExprCompose(
             dst[24:32], dst[16:24], dst[8:16], dst[:8])
diff --git a/miasm/core/asmblock.py b/miasm/core/asmblock.py
index 93ad6b13..5ac1312f 100644
--- a/miasm/core/asmblock.py
+++ b/miasm/core/asmblock.py
@@ -42,13 +42,6 @@ class AsmRaw(object):
         return str(self)
 
 
-class asm_raw(AsmRaw):
-
-    def __init__(self, raw=b""):
-        warnings.warn('DEPRECATION WARNING: use "AsmRaw" instead of "asm_raw"')
-        super(asm_label, self).__init__(raw)
-
-
 class AsmConstraint(object):
     c_to = "c_to"
     c_next = "c_next"
@@ -60,16 +53,6 @@ class AsmConstraint(object):
         self.loc_key = loc_key
         self.c_t = c_t
 
-    def get_label(self):
-        warnings.warn('DEPRECATION WARNING: use ".loc_key" instead of ".label"')
-        return self.loc_key
-
-    def set_label(self, loc_key):
-        warnings.warn('DEPRECATION WARNING: use ".loc_key" instead of ".label"')
-        self.loc_key = loc_key
-
-    label = property(get_label, set_label)
-
     def to_string(self, loc_db=None):
         if loc_db is None:
             return "%s:%s" % (self.c_t, self.loc_key)
@@ -83,13 +66,6 @@ class AsmConstraint(object):
         return self.to_string()
 
 
-class asm_constraint(AsmConstraint):
-
-    def __init__(self, loc_key, c_t=AsmConstraint.c_to):
-        warnings.warn('DEPRECATION WARNING: use "AsmConstraint" instead of "asm_constraint"')
-        super(asm_constraint, self).__init__(loc_key, c_t)
-
-
 class AsmConstraintNext(AsmConstraint):
 
     def __init__(self, loc_key):
@@ -99,13 +75,6 @@ class AsmConstraintNext(AsmConstraint):
         )
 
 
-class asm_constraint_next(AsmConstraint):
-
-    def __init__(self, loc_key):
-        warnings.warn('DEPRECATION WARNING: use "AsmConstraintNext" instead of "asm_constraint_next"')
-        super(asm_constraint_next, self).__init__(loc_key)
-
-
 class AsmConstraintTo(AsmConstraint):
 
     def __init__(self, loc_key):
@@ -114,12 +83,6 @@ class AsmConstraintTo(AsmConstraint):
             c_t=AsmConstraint.c_to
         )
 
-class asm_constraint_to(AsmConstraint):
-
-    def __init__(self, loc_key):
-        warnings.warn('DEPRECATION WARNING: use "AsmConstraintTo" instead of "asm_constraint_to"')
-        super(asm_constraint_to, self).__init__(loc_key)
-
 
 class AsmBlock(object):
 
@@ -131,13 +94,7 @@ class AsmBlock(object):
         self._loc_key = loc_key
         self.alignment = alignment
 
-    def get_label(self):
-        warnings.warn('DEPRECATION WARNING: use ".loc_key" instead of ".label"')
-        return self.loc_key
-
     loc_key = property(lambda self:self._loc_key)
-    label = property(get_label)
-
 
     def to_string(self, loc_db=None):
         out = []
@@ -290,13 +247,6 @@ class AsmBlock(object):
         )
 
 
-class asm_bloc(object):
-
-    def __init__(self, loc_key, alignment=1):
-        warnings.warn('DEPRECATION WARNING: use "AsmBlock" instead of "asm_bloc"')
-        super(asm_bloc, self).__init__(loc_key, alignment)
-
-
 class AsmBlockBad(AsmBlock):
 
     """Stand for a *bad* ASM block (malformed, unreachable,
@@ -345,26 +295,6 @@ class AsmBlockBad(AsmBlock):
         raise RuntimeError("An AsmBlockBad cannot be split")
 
 
-class asm_block_bad(AsmBlockBad):
-
-    def __init__(self, loc_key=None, alignment=1, errno=-1, *args, **kwargs):
-        warnings.warn('DEPRECATION WARNING: use "AsmBlockBad" instead of "asm_block_bad"')
-        super(asm_block_bad, self).__init__(loc_key, alignment, *args, **kwargs)
-
-class AsmSymbolPool(LocationDB):
-    """[DEPRECATED API] use 'LocationDB' instead"""
-
-    def __init__(self, *args, **kwargs):
-        warnings.warn("Deprecated API, use 'LocationDB' instead")
-        super(AsmSymbolPool, self).__init__(*args, **kwargs)
-
-class asm_symbol_pool(AsmSymbolPool):
-
-    def __init__(self):
-        warnings.warn('DEPRECATION WARNING: use "LocationDB" instead of "asm_symbol_pool"')
-        super(asm_symbol_pool, self).__init__()
-
-
 class AsmCFG(DiGraph):
 
     """Directed graph standing for a ASM Control Flow Graph with:
@@ -399,32 +329,6 @@ class AsmCFG(DiGraph):
         graph = self.__class__(self.loc_db)
         return graph + self
 
-
-    # Compatibility with old list API
-    def append(self, *args, **kwargs):
-        raise DeprecationWarning("AsmCFG is a graph, use add_node")
-
-    def remove(self, *args, **kwargs):
-        raise DeprecationWarning("AsmCFG is a graph, use del_node")
-
-    def __getitem__(self, *args, **kwargs):
-        raise DeprecationWarning("Order of AsmCFG elements is not reliable")
-
-    def __contains__(self, _):
-        """
-        DEPRECATED. Use:
-        - loc_key in AsmCFG.nodes() to test loc_key existence
-        """
-        raise RuntimeError("DEPRECATED")
-
-    def __iter__(self):
-        """
-        DEPRECATED. Use:
-        - AsmCFG.blocks() to iter on blocks
-        - loc_key in AsmCFG.nodes() to test loc_key existence
-        """
-        raise RuntimeError("DEPRECATED")
-
     def __len__(self):
         """Return the number of blocks in AsmCFG"""
         return len(self._nodes)
@@ -609,16 +513,6 @@ class AsmCFG(DiGraph):
         """
         return self._pendings
 
-    def label2block(self, loc_key):
-        """
-        DEPRECATED: Use "loc_key_to_block" instead of "label2block"
-
-        Return the block corresponding to loc_key @loc_key
-        @loc_key: LocKey instance
-        """
-        warnings.warn('DEPRECATION WARNING: use "loc_key_to_block" instead of "label2block"')
-        return self.loc_key_to_block(loc_key)
-
     def rebuild_edges(self):
         """Consider blocks '.bto' and rebuild edges according to them, ie:
         - update constraint type
@@ -1310,13 +1204,6 @@ def asmblock_final(mnemo, asmcfg, blockChains, loc_db, conservative=False):
             assemble_block(mnemo, block, loc_db, conservative)
 
 
-def asmbloc_final(mnemo, blocks, blockChains, loc_db, conservative=False):
-    """Resolve and assemble @blockChains using @loc_db until fixed point is
-    reached"""
-
-    warnings.warn('DEPRECATION WARNING: use "asmblock_final" instead of "asmbloc_final"')
-    asmblock_final(mnemo, blocks, blockChains, loc_db, conservative)
-
 def asm_resolve_final(mnemo, asmcfg, loc_db, dst_interval=None):
     """Resolve and assemble @asmcfg using @loc_db into interval
     @dst_interval"""
@@ -1405,31 +1292,6 @@ class disasmEngine(object):
         # Override options if needed
         self.__dict__.update(kwargs)
 
-    def get_job_done(self):
-        warnings.warn("""DEPRECATION WARNING: "job_done" is not needed anymore, support is dropped.""")
-        return set()
-
-    def set_job_done(self, _):
-        warnings.warn("""DEPRECATION WARNING: "job_done" is not needed anymore, support is dropped.""")
-        return
-
-    def get_dis_bloc_callback(self):
-        warnings.warn("""DEPRECATION WARNING: "dis_bloc_callback" use dis_block_callback.""")
-        return self.dis_block_callback
-
-    def set_dis_bloc_callback(self, function):
-        warnings.warn("""DEPRECATION WARNING: "dis_bloc_callback" use dis_block_callback.""")
-        self.dis_block_callback = function
-
-    @property
-    def symbol_pool(self):
-        warnings.warn("""DEPRECATION WARNING: use 'loc_db'""")
-        return self.loc_db
-
-    # Deprecated
-    job_done = property(get_job_done, set_job_done)
-    dis_bloc_callback = property(get_dis_bloc_callback, set_dis_bloc_callback)
-
     def _dis_block(self, offset, job_done=None):
         """Disassemble the block at offset @offset
         @job_done: a set of already disassembled addresses
@@ -1567,9 +1429,7 @@ class disasmEngine(object):
             self.dis_block_callback(mn=self.arch, attrib=self.attrib,
                                     pool_bin=self.bin_stream, cur_bloc=cur_block,
                                     offsets_to_dis=offsets_to_dis,
-                                    loc_db=self.loc_db,
-                                    # Deprecated API
-                                    symbol_pool=self.loc_db)
+                                    loc_db=self.loc_db)
         return cur_block, offsets_to_dis
 
     def dis_block(self, offset):
@@ -1580,14 +1440,6 @@ class disasmEngine(object):
         current_block, _ = self._dis_block(offset)
         return current_block
 
-    def dis_bloc(self, offset):
-        """
-        DEPRECATED function
-        Use dis_block instead of dis_bloc
-        """
-        warnings.warn('DEPRECATION WARNING: use "dis_block" instead of "dis_bloc"')
-        return self.dis_block(offset)
-
     def dis_multiblock(self, offset, blocks=None, job_done=None):
         """Disassemble every block reachable from @offset regarding
         specific disasmEngine conditions
@@ -1624,14 +1476,6 @@ class disasmEngine(object):
                                pool_bin=self.bin_stream)
         return blocks
 
-    def dis_multibloc(self, offset, blocs=None):
-        """
-        DEPRECATED function
-        Use dis_multiblock instead of dis_multibloc
-        """
-        warnings.warn('DEPRECATION WARNING: use "dis_multiblock" instead of "dis_multibloc"')
-        return self.dis_multiblock(offset, blocs)
-
     def dis_instr(self, offset):
         """Disassemble one instruction at offset @offset and return the
         corresponding instruction instance
diff --git a/miasm/core/bin_stream.py b/miasm/core/bin_stream.py
index 727a853d..9224053f 100644
--- a/miasm/core/bin_stream.py
+++ b/miasm/core/bin_stream.py
@@ -137,7 +137,7 @@ class bin_stream(object):
         if endianness == LITTLE_ENDIAN:
             return upck16le(data)
         else:
-            return upck32be(data)
+            return upck16be(data)
 
     def get_u32(self, addr, endianness=None):
         """
diff --git a/miasm/core/cpu.py b/miasm/core/cpu.py
index ec8d95bc..aee22c97 100644
--- a/miasm/core/cpu.py
+++ b/miasm/core/cpu.py
@@ -1589,6 +1589,9 @@ class imm_noarg(object):
 
         assert(m2_expr.is_expr(e))
         self.expr = e
+        if self.expr is None:
+            log.debug('cannot fromstring int %r', text)
+            return None, None
         return start, stop
 
     def decodeval(self, v):
diff --git a/miasm/core/utils.py b/miasm/core/utils.py
index 7667a656..37248c40 100644
--- a/miasm/core/utils.py
+++ b/miasm/core/utils.py
@@ -81,10 +81,16 @@ def printable(string):
 
 
 def force_bytes(value):
-    try:
-        return value.encode()
-    except AttributeError:
+    if isinstance(value, bytes):
         return value
+    if not isinstance(value, str):
+        return value
+    out = []
+    for c in value:
+        c = ord(c)
+        assert c < 0x100
+        out.append(c)
+    return bytes(out)
 
 
 def force_str(value):
diff --git a/miasm/expression/expression.py b/miasm/expression/expression.py
index 18fcb77a..c2bf5b8b 100644
--- a/miasm/expression/expression.py
+++ b/miasm/expression/expression.py
@@ -373,7 +373,7 @@ class ExprVisitorCallbackTopToBottom(ExprVisitorBase):
     """
     Rebuild expression by visiting sub-expressions
     Call @callback on each sub-expression
-    if @¢allback return non None value, replace current node with this value
+    if @callback return non None value, replace current node with this value
     Else, continue visit of sub-expressions
     """
     def __init__(self, callback):
@@ -727,8 +727,8 @@ class Expr(object):
 
     def visit(self, callback):
         """
-        Apply callbak to all sub expression of @self
-        This function keeps a cache to avoid rerunning @callbak on common sub
+        Apply callback to all sub expression of @self
+        This function keeps a cache to avoid rerunning @callback on common sub
         expressions.
 
         @callback: fn(Expr) -> Expr
diff --git a/miasm/expression/simplifications.py b/miasm/expression/simplifications.py
index a56aa0f8..3f54b158 100644
--- a/miasm/expression/simplifications.py
+++ b/miasm/expression/simplifications.py
@@ -63,6 +63,8 @@ class ExpressionSimplifier(ExprVisitorCallbackBottomToTop):
             simplifications_common.simp_test_zeroext_inf,
             simplifications_common.simp_cond_inf_eq_unsigned_zero,
             simplifications_common.simp_compose_and_mask,
+            simplifications_common.simp_bcdadd_cf,
+            simplifications_common.simp_bcdadd,
         ],
 
         m2_expr.ExprSlice: [
diff --git a/miasm/expression/simplifications_common.py b/miasm/expression/simplifications_common.py
index f12ccfcf..932db49a 100644
--- a/miasm/expression/simplifications_common.py
+++ b/miasm/expression/simplifications_common.py
@@ -71,26 +71,28 @@ def simp_cst_propagation(e_s, expr):
                 shifter = int(int2) % int2.size
                 out = (int(int1) << shifter) | (int(int1) >> (int2.size - shifter))
             elif op_name == '/':
+                assert int(int2), "division by 0"
                 out = int(int1) // int(int2)
             elif op_name == '%':
+                assert int(int2), "division by 0"
                 out = int(int1) % int(int2)
             elif op_name == 'sdiv':
-                assert int(int2)
+                assert int(int2), "division by 0"
                 tmp1 = mod_size2int[int1.size](int(int1))
                 tmp2 = mod_size2int[int2.size](int(int2))
                 out = mod_size2uint[int1.size](tmp1 // tmp2)
             elif op_name == 'smod':
-                assert int(int2)
+                assert int(int2), "division by 0"
                 tmp1 = mod_size2int[int1.size](int(int1))
                 tmp2 = mod_size2int[int2.size](int(int2))
                 out = mod_size2uint[int1.size](tmp1 % tmp2)
             elif op_name == 'umod':
-                assert int(int2)
+                assert int(int2), "division by 0"
                 tmp1 = mod_size2uint[int1.size](int(int1))
                 tmp2 = mod_size2uint[int2.size](int(int2))
                 out = mod_size2uint[int1.size](tmp1 % tmp2)
             elif op_name == 'udiv':
-                assert int(int2)
+                assert int(int2), "division by 0"
                 tmp1 = mod_size2uint[int1.size](int(int1))
                 tmp2 = mod_size2uint[int2.size](int(int2))
                 out = mod_size2uint[int1.size](tmp1 // tmp2)
@@ -1081,6 +1083,13 @@ def simp_cmp_bijective_op(expr_simp, expr):
             args_a.remove(value)
             args_b.remove(value)
 
+    # a + b == a + b + c
+    if not args_a:
+        return ExprOp(TOK_EQUAL, ExprOp(op, *args_b), ExprInt(0, args_b[0].size))
+    # a + b + c == a + b
+    if not args_b:
+        return ExprOp(TOK_EQUAL, ExprOp(op, *args_a), ExprInt(0, args_a[0].size))
+    
     arg_a = ExprOp(op, *args_a)
     arg_b = ExprOp(op, *args_b)
     return ExprOp(TOK_EQUAL, arg_a, arg_b)
@@ -1683,3 +1692,54 @@ def simp_compose_and_mask(_, expr):
         else:
             out.append(arg)
     return expr
+
+def simp_bcdadd_cf(_, expr):
+    """bcdadd(const, const) => decimal"""
+    if not(expr.is_op('bcdadd_cf')):
+        return expr
+    arg1 = expr.args[0]
+    arg2 = expr.args[1]
+    if not(arg1.is_int() and arg2.is_int()):
+        return expr
+
+    carry = 0
+    res = 0
+    nib_1, nib_2 = 0, 0
+    for i in range(0,16,4):
+        nib_1 = (arg1.arg >> i) & (0xF)
+        nib_2 = (arg2.arg >> i) & (0xF)
+        
+        j = (carry + nib_1 + nib_2)
+        if (j >= 10):
+            carry = 1
+            j -= 10
+            j &= 0xF
+        else:
+            carry = 0
+    return ExprInt(carry, 1)
+
+def simp_bcdadd(_, expr):
+    """bcdadd(const, const) => decimal"""
+    if not(expr.is_op('bcdadd')):
+        return expr
+    arg1 = expr.args[0]
+    arg2 = expr.args[1]
+    if not(arg1.is_int() and arg2.is_int()):
+        return expr
+
+    carry = 0
+    res = 0
+    nib_1, nib_2 = 0, 0
+    for i in range(0,16,4):
+        nib_1 = (arg1.arg >> i) & (0xF)
+        nib_2 = (arg2.arg >> i) & (0xF)
+        
+        j = (carry + nib_1 + nib_2)
+        if (j >= 10):
+            carry = 1
+            j -= 10
+            j &= 0xF
+        else:
+            carry = 0
+        res += j << i
+    return ExprInt(res, arg1.size)
diff --git a/miasm/ir/ir.py b/miasm/ir/ir.py
index 9b2e4ba0..3219b5fc 100644
--- a/miasm/ir/ir.py
+++ b/miasm/ir/ir.py
@@ -885,7 +885,7 @@ class IntermediateRepresentation(object):
         return irblock
 
     def is_pc_written(self, block):
-        """Return the first Assignblk of the @blockin which PC is written
+        """Return the first Assignblk of the @block in which PC is written
         @block: IRBlock instance"""
         all_pc = viewvalues(self.arch.pc)
         for assignblk in block:
diff --git a/miasm/ir/symbexec.py b/miasm/ir/symbexec.py
index 65ddde3b..8c6245b8 100644
--- a/miasm/ir/symbexec.py
+++ b/miasm/ir/symbexec.py
@@ -121,7 +121,7 @@ class MemArray(MutableMapping):
     content relatively to an integer offset from *base*.
 
     The value associated to a given offset is a description of the slice of a
-    stored expression. The slice size depends on the configutation of the
+    stored expression. The slice size depends on the configuration of the
     MemArray. For example, for a slice size of 8 bits, the assignment:
     - @32[EAX+0x10] = EBX
 
diff --git a/miasm/ir/translators/z3_ir.py b/miasm/ir/translators/z3_ir.py
index 6b706770..1a36e94e 100644
--- a/miasm/ir/translators/z3_ir.py
+++ b/miasm/ir/translators/z3_ir.py
@@ -15,7 +15,7 @@ log.addHandler(console_handler)
 log.setLevel(logging.WARNING)
 
 class Z3Mem(object):
-    """Memory abstration for TranslatorZ3. Memory elements are only accessed,
+    """Memory abstraction for TranslatorZ3. Memory elements are only accessed,
     never written. To give a concrete value for a given memory cell in a solver,
     add "mem32.get(address, size) == <value>" constraints to your equation.
     The endianness of memory accesses is handled accordingly to the "endianness"
diff --git a/miasm/jitter/arch/JitCore_mips32.h b/miasm/jitter/arch/JitCore_mips32.h
index 74eb35ef..8478fb53 100644
--- a/miasm/jitter/arch/JitCore_mips32.h
+++ b/miasm/jitter/arch/JitCore_mips32.h
@@ -83,7 +83,7 @@ struct vm_cpu {
 	uint32_t CPR0_5;
 	uint32_t CPR0_6;
 	uint32_t CPR0_7;
-	uint32_t CPR0_8;
+	uint32_t RANDOM;
 	uint32_t CPR0_9;
 	uint32_t CPR0_10;
 	uint32_t CPR0_11;
@@ -107,8 +107,8 @@ struct vm_cpu {
 	uint32_t CPR0_29;
 	uint32_t CPR0_30;
 	uint32_t CPR0_31;
-	uint32_t CPR0_32;
-	uint32_t CPR0_33;
+	uint32_t CONTEXT;
+	uint32_t CONTEXTCONFIG;
 	uint32_t CPR0_34;
 	uint32_t CPR0_35;
 	uint32_t CPR0_36;
@@ -116,20 +116,20 @@ struct vm_cpu {
 	uint32_t CPR0_38;
 	uint32_t CPR0_39;
 	uint32_t PAGEMASK;
-	uint32_t CPR0_41;
-	uint32_t CPR0_42;
-	uint32_t CPR0_43;
-	uint32_t CPR0_44;
-	uint32_t CPR0_45;
-	uint32_t CPR0_46;
-	uint32_t CPR0_47;
-	uint32_t CPR0_48;
+	uint32_t PAGEGRAIN;
+	uint32_t SEGCTL0;
+	uint32_t SEGCTL1;
+	uint32_t SEGCTL2;
+	uint32_t PWBASE;
+	uint32_t PWFIELD;
+	uint32_t PWSIZE;
+	uint32_t WIRED;
 	uint32_t CPR0_49;
 	uint32_t CPR0_50;
 	uint32_t CPR0_51;
 	uint32_t CPR0_52;
 	uint32_t CPR0_53;
-	uint32_t CPR0_54;
+	uint32_t PWCTL;
 	uint32_t CPR0_55;
 	uint32_t CPR0_56;
 	uint32_t CPR0_57;
@@ -139,9 +139,9 @@ struct vm_cpu {
 	uint32_t CPR0_61;
 	uint32_t CPR0_62;
 	uint32_t CPR0_63;
-	uint32_t CPR0_64;
-	uint32_t CPR0_65;
-	uint32_t CPR0_66;
+	uint32_t BADVADDR;
+	uint32_t BADINSTR;
+	uint32_t BADINSTRP;
 	uint32_t CPR0_67;
 	uint32_t CPR0_68;
 	uint32_t CPR0_69;
@@ -195,8 +195,8 @@ struct vm_cpu {
 	uint32_t CPR0_117;
 	uint32_t CPR0_118;
 	uint32_t CPR0_119;
-	uint32_t CPR0_120;
-	uint32_t CPR0_121;
+	uint32_t PRID;
+	uint32_t EBASE;
 	uint32_t CPR0_122;
 	uint32_t CPR0_123;
 	uint32_t CPR0_124;
@@ -204,11 +204,11 @@ struct vm_cpu {
 	uint32_t CPR0_126;
 	uint32_t CPR0_127;
 	uint32_t CONFIG;
-	uint32_t CPR0_129;
-	uint32_t CPR0_130;
-	uint32_t CPR0_131;
-	uint32_t CPR0_132;
-	uint32_t CPR0_133;
+	uint32_t CONFIG1;
+	uint32_t CONFIG2;
+	uint32_t CONFIG3;
+	uint32_t CONFIG4;
+	uint32_t CONFIG5;
 	uint32_t CPR0_134;
 	uint32_t CPR0_135;
 	uint32_t CPR0_136;
@@ -325,12 +325,12 @@ struct vm_cpu {
 	uint32_t CPR0_247;
 	uint32_t CPR0_248;
 	uint32_t CPR0_249;
-	uint32_t CPR0_250;
-	uint32_t CPR0_251;
-	uint32_t CPR0_252;
-	uint32_t CPR0_253;
-	uint32_t CPR0_254;
-	uint32_t CPR0_255;
+	uint32_t KSCRATCH0;
+	uint32_t KSCRATCH1;
+	uint32_t KSCRATCH2;
+	uint32_t KSCRATCH3;
+	uint32_t KSCRATCH4;
+	uint32_t KSCRATCH5;
 };
 
 _MIASM_EXPORT void dump_gpregs(struct vm_cpu* vmcpu);
diff --git a/miasm/jitter/arch/JitCore_ppc32_regs.h b/miasm/jitter/arch/JitCore_ppc32_regs.h
index a16d1e95..79191d32 100644
--- a/miasm/jitter/arch/JitCore_ppc32_regs.h
+++ b/miasm/jitter/arch/JitCore_ppc32_regs.h
@@ -121,3 +121,72 @@ JITCORE_PPC_REG_EXPAND(DBAT2L, 32)
 JITCORE_PPC_REG_EXPAND(DBAT3U, 32)
 JITCORE_PPC_REG_EXPAND(DBAT3L, 32)
 JITCORE_PPC_REG_EXPAND(SDR1, 32)
+
+JITCORE_PPC_REG_EXPAND(FPR0, 64)
+JITCORE_PPC_REG_EXPAND(FPR1, 64)
+JITCORE_PPC_REG_EXPAND(FPR2, 64)
+JITCORE_PPC_REG_EXPAND(FPR3, 64)
+JITCORE_PPC_REG_EXPAND(FPR4, 64)
+JITCORE_PPC_REG_EXPAND(FPR5, 64)
+JITCORE_PPC_REG_EXPAND(FPR6, 64)
+JITCORE_PPC_REG_EXPAND(FPR7, 64)
+JITCORE_PPC_REG_EXPAND(FPR8, 64)
+JITCORE_PPC_REG_EXPAND(FPR9, 64)
+JITCORE_PPC_REG_EXPAND(FPR10, 64)
+JITCORE_PPC_REG_EXPAND(FPR11, 64)
+JITCORE_PPC_REG_EXPAND(FPR12, 64)
+JITCORE_PPC_REG_EXPAND(FPR13, 64)
+JITCORE_PPC_REG_EXPAND(FPR14, 64)
+JITCORE_PPC_REG_EXPAND(FPR15, 64)
+JITCORE_PPC_REG_EXPAND(FPR16, 64)
+JITCORE_PPC_REG_EXPAND(FPR17, 64)
+JITCORE_PPC_REG_EXPAND(FPR18, 64)
+JITCORE_PPC_REG_EXPAND(FPR19, 64)
+JITCORE_PPC_REG_EXPAND(FPR20, 64)
+JITCORE_PPC_REG_EXPAND(FPR21, 64)
+JITCORE_PPC_REG_EXPAND(FPR22, 64)
+JITCORE_PPC_REG_EXPAND(FPR23, 64)
+JITCORE_PPC_REG_EXPAND(FPR24, 64)
+JITCORE_PPC_REG_EXPAND(FPR25, 64)
+JITCORE_PPC_REG_EXPAND(FPR26, 64)
+JITCORE_PPC_REG_EXPAND(FPR27, 64)
+JITCORE_PPC_REG_EXPAND(FPR28, 64)
+JITCORE_PPC_REG_EXPAND(FPR29, 64)
+JITCORE_PPC_REG_EXPAND(FPR30, 64)
+JITCORE_PPC_REG_EXPAND(FPR31, 64)
+JITCORE_PPC_REG_EXPAND(FPSCR, 32)
+
+JITCORE_PPC_REG_EXPAND(VR0, 128)
+JITCORE_PPC_REG_EXPAND(VR1, 128)
+JITCORE_PPC_REG_EXPAND(VR2, 128)
+JITCORE_PPC_REG_EXPAND(VR3, 128)
+JITCORE_PPC_REG_EXPAND(VR4, 128)
+JITCORE_PPC_REG_EXPAND(VR5, 128)
+JITCORE_PPC_REG_EXPAND(VR6, 128)
+JITCORE_PPC_REG_EXPAND(VR7, 128)
+JITCORE_PPC_REG_EXPAND(VR8, 128)
+JITCORE_PPC_REG_EXPAND(VR9, 128)
+JITCORE_PPC_REG_EXPAND(VR10, 128)
+JITCORE_PPC_REG_EXPAND(VR11, 128)
+JITCORE_PPC_REG_EXPAND(VR12, 128)
+JITCORE_PPC_REG_EXPAND(VR13, 128)
+JITCORE_PPC_REG_EXPAND(VR14, 128)
+JITCORE_PPC_REG_EXPAND(VR15, 128)
+JITCORE_PPC_REG_EXPAND(VR16, 128)
+JITCORE_PPC_REG_EXPAND(VR17, 128)
+JITCORE_PPC_REG_EXPAND(VR18, 128)
+JITCORE_PPC_REG_EXPAND(VR19, 128)
+JITCORE_PPC_REG_EXPAND(VR20, 128)
+JITCORE_PPC_REG_EXPAND(VR21, 128)
+JITCORE_PPC_REG_EXPAND(VR22, 128)
+JITCORE_PPC_REG_EXPAND(VR23, 128)
+JITCORE_PPC_REG_EXPAND(VR24, 128)
+JITCORE_PPC_REG_EXPAND(VR25, 128)
+JITCORE_PPC_REG_EXPAND(VR26, 128)
+JITCORE_PPC_REG_EXPAND(VR27, 128)
+JITCORE_PPC_REG_EXPAND(VR28, 128)
+JITCORE_PPC_REG_EXPAND(VR29, 128)
+JITCORE_PPC_REG_EXPAND(VR30, 128)
+JITCORE_PPC_REG_EXPAND(VR31, 128)
+JITCORE_PPC_REG_EXPAND(VRSAVE, 32)
+JITCORE_PPC_REG_EXPAND(VSCR, 32)
diff --git a/miasm/jitter/bn.h b/miasm/jitter/bn.h
index 1aa6b432..8c4a8ba1 100644
--- a/miasm/jitter/bn.h
+++ b/miasm/jitter/bn.h
@@ -35,7 +35,7 @@ Code slightly modified to support ast generation calculus style from Expr.
 #include <assert.h>
 
 
-/* This macro defines the word size in bytes of the array that constitues the big-number data structure. */
+/* This macro defines the word size in bytes of the array that constitutes the big-number data structure. */
 #ifndef WORD_SIZE
   #define WORD_SIZE 4
 #endif
diff --git a/miasm/jitter/jitcore.py b/miasm/jitter/jitcore.py
index cff01f9b..cc531cf5 100644
--- a/miasm/jitter/jitcore.py
+++ b/miasm/jitter/jitcore.py
@@ -198,10 +198,7 @@ class JitCore(object):
         """
 
         mem_range = interval()
-
-        for block in blocks:
-            mem_range += interval([(block.ad_min, block.ad_max - 1)])
-
+        mem_range = interval([(block.ad_min, block.ad_max - 1) for block in blocks])
         return mem_range
 
     def __updt_jitcode_mem_range(self, vm):
diff --git a/miasm/jitter/jitcore_cc_base.py b/miasm/jitter/jitcore_cc_base.py
index 995c458b..afb2876c 100644
--- a/miasm/jitter/jitcore_cc_base.py
+++ b/miasm/jitter/jitcore_cc_base.py
@@ -1,5 +1,6 @@
 #-*- coding:utf-8 -*-
 
+import glob
 import os
 import tempfile
 import platform
@@ -76,6 +77,12 @@ class JitCore_Cc_Base(JitCore):
         ext = sysconfig.get_config_var('EXT_SUFFIX')
         if ext is None:
             ext = ".so" if not is_win else ".lib"
+        if is_win:
+            # sysconfig.get_config_var('EXT_SUFFIX') is .pyd on Windows and need to be forced to .lib
+            # Additionally windows built libraries may have a name like VmMngr.cp38-win_amd64.lib
+            ext_files = glob.glob(os.path.join(lib_dir, "VmMngr.*lib"))
+            if len(ext_files) == 1:
+                ext = os.path.basename(ext_files[0]).replace("VmMngr", "")
 
         libs = [
             os.path.join(lib_dir, "VmMngr" + ext),
diff --git a/miasm/jitter/jitcore_gcc.py b/miasm/jitter/jitcore_gcc.py
index 1520cf38..7ffef69e 100644
--- a/miasm/jitter/jitcore_gcc.py
+++ b/miasm/jitter/jitcore_gcc.py
@@ -1,5 +1,6 @@
 #-*- coding:utf-8 -*-
 
+import sys
 import os
 import tempfile
 import ctypes
@@ -70,7 +71,7 @@ class JitCore_Gcc(JitCore_Cc_Base):
                         get_python_inc(),
                         "..",
                         "libs",
-                        "python27.lib"
+                        "python%d%d.lib" % (sys.version_info.major, sys.version_info.minor)
                     )
                 )
                 cl = [
diff --git a/miasm/jitter/jitcore_llvm.py b/miasm/jitter/jitcore_llvm.py
index 46e93282..df7d5950 100644
--- a/miasm/jitter/jitcore_llvm.py
+++ b/miasm/jitter/jitcore_llvm.py
@@ -1,5 +1,6 @@
 from __future__ import print_function
 import os
+import glob
 import importlib
 import tempfile
 import sysconfig
@@ -9,6 +10,9 @@ import miasm.jitter.jitcore as jitcore
 from miasm.jitter import Jitllvm
 import platform
 
+import llvmlite
+llvmlite.binding.load_library_permanently(Jitllvm.__file__)
+
 is_win = platform.system() == "Windows"
 
 class JitCore_LLVM(jitcore.JitCore):
@@ -56,10 +60,16 @@ class JitCore_LLVM(jitcore.JitCore):
 
         # Get architecture dependent Jitcore library (if any)
         lib_dir = os.path.dirname(os.path.realpath(__file__))
-        lib_dir = os.path.join(lib_dir, 'arch')
         ext = sysconfig.get_config_var('EXT_SUFFIX')
         if ext is None:
             ext = ".so" if not is_win else ".pyd"
+        if is_win:
+            # sysconfig.get_config_var('EXT_SUFFIX') is .pyd on Windows and need to be forced to .lib
+            # Additionally windows built libraries may have a name like VmMngr.cp38-win_amd64.lib
+            ext_files = glob.glob(os.path.join(lib_dir, "VmMngr.*pyd"))
+            if len(ext_files) == 1:
+                ext = os.path.basename(ext_files[0]).replace("VmMngr", "")
+        lib_dir = os.path.join(lib_dir, 'arch')
         try:
             jit_lib = os.path.join(
                 lib_dir, self.arch_dependent_libs[self.ir_arch.arch.name] + ext
diff --git a/miasm/jitter/jitload.py b/miasm/jitter/jitload.py
index 68f9c40d..85d5636f 100644
--- a/miasm/jitter/jitload.py
+++ b/miasm/jitter/jitload.py
@@ -393,13 +393,16 @@ class Jitter(object):
         self.pc = pc
         self.run = True
 
-    def continue_run(self, step=False):
+    def continue_run(self, step=False, trace=False):
         """PRE: init_run.
         Continue the run of the current session until iterator returns or run is
         set to False.
         If step is True, run only one time.
+        If trace is True, activate trace log option until execution stops
         Return the iterator value"""
 
+        if trace:
+            self.set_trace_log()
         while self.run:
             try:
                 return next(self.run_iterator)
@@ -409,8 +412,9 @@ class Jitter(object):
             self.run_iterator = self.runiter_once(self.pc)
 
             if step is True:
-                return None
-
+                break
+        if trace:
+            self.set_trace_log(False, False, False)
         return None
 
 
@@ -422,6 +426,18 @@ class Jitter(object):
         self.init_run(addr)
         return self.continue_run()
 
+    def run_until(self, addr, trace=False):
+        """PRE: init_run.
+        Continue the run of the current session until iterator returns, run is
+        set to False or addr is reached.
+        If trace is True, activate trace log option until execution stops
+        Return the iterator value"""
+
+        def stop_exec(jitter):
+            jitter.remove_breakpoints_by_callback(stop_exec)
+            return False
+        self.add_breakpoint(addr, stop_exec)
+        return self.continue_run(trace=trace)
 
     def init_stack(self):
         self.vm.add_memory_page(
diff --git a/miasm/jitter/loader/pe.py b/miasm/jitter/loader/pe.py
index 961bfd93..73cb1367 100644
--- a/miasm/jitter/loader/pe.py
+++ b/miasm/jitter/loader/pe.py
@@ -171,7 +171,7 @@ def get_export_name_addr_list(e):
     return out
 
 
-def vm_load_pe(vm, fdata, align_s=True, load_hdr=True, name="", **kargs):
+def vm_load_pe(vm, fdata, align_s=True, load_hdr=True, name="", winobjs=None, **kargs):
     """Load a PE in memory (@vm) from a data buffer @fdata
     @vm: VmMngr instance
     @fdata: data buffer to parse
@@ -207,6 +207,9 @@ def vm_load_pe(vm, fdata, align_s=True, load_hdr=True, name="", **kargs):
                 pe.content[:hdr_len] +
                 max(0, (min_len - hdr_len)) * b"\x00"
             )
+
+            if winobjs:
+                winobjs.allocated_pages[pe.NThdr.ImageBase] = (pe.NThdr.ImageBase, len(pe_hdr))
             vm.add_memory_page(
                 pe.NThdr.ImageBase,
                 PAGE_READ | PAGE_WRITE,
@@ -237,8 +240,12 @@ def vm_load_pe(vm, fdata, align_s=True, load_hdr=True, name="", **kargs):
             attrib = PAGE_READ
             if section.flags & 0x80000000:
                 attrib |= PAGE_WRITE
+
+            section_addr = pe.rva2virt(section.addr)
+            if winobjs:
+                winobjs.allocated_pages[section_addr] = (section_addr, len(data))
             vm.add_memory_page(
-                pe.rva2virt(section.addr),
+                section_addr,
                 attrib,
                 data,
                 "%r: %r" % (name, section.name)
diff --git a/miasm/loader/elf_init.py b/miasm/loader/elf_init.py
index 14f4dc7c..72d08302 100644
--- a/miasm/loader/elf_init.py
+++ b/miasm/loader/elf_init.py
@@ -92,6 +92,8 @@ class WRel32(StructWrapper):
     wrapped._fields.append(("type", "u08"))
 
     def get_sym(self):
+        if isinstance(self.parent.linksection, NullSection):
+            return None
         return self.parent.linksection.symtab[self.cstr.info >> 8].name
 
     def get_type(self):
diff --git a/miasm/loader/new_cstruct.py b/miasm/loader/new_cstruct.py
index ec591aa8..16c947a5 100644
--- a/miasm/loader/new_cstruct.py
+++ b/miasm/loader/new_cstruct.py
@@ -4,6 +4,7 @@ from __future__ import print_function
 import re
 import struct
 
+from miasm.core.utils import force_bytes
 from future.utils import PY3, viewitems, with_metaclass
 
 type2realtype = {}
@@ -213,9 +214,10 @@ class CStruct(with_metaclass(Cstruct_Metaclass, object)):
                 if cpt == None:
                     if value == None:
                         o = struct.calcsize(fmt) * b"\x00"
+                    elif ffmt.endswith('s'):
+                        new_value = force_bytes(value)
+                        o = struct.pack(self.sex + fmt, new_value)
                     else:
-                        if isinstance(value, str):
-                            value = value.encode()
                         o = struct.pack(self.sex + fmt, value)
                 else:
                     o = b""
diff --git a/miasm/loader/pe.py b/miasm/loader/pe.py
index f402e980..2d257906 100644
--- a/miasm/loader/pe.py
+++ b/miasm/loader/pe.py
@@ -267,7 +267,7 @@ class DescName(CStruct):
         return name, off + len(name) + 1
 
     def sets(self, value):
-        return bytes(value) + b"\x00"
+        return force_bytes(value) + b"\x00"
 
 
 class ImportByName(CStruct):
@@ -434,7 +434,7 @@ class DirImport(CStruct):
             #    entry.firstthunk = rva
             # rva+=(len(entry.firstthunks)+1)*self.parent_head._wsize//8 # Rva size
             if entry.originalfirstthunk and entry.firstthunk:
-                if isinstance(entry.originalfirstthunk, struct_array):
+                if isinstance(entry.originalfirstthunks, struct_array):
                     tmp_thunk = entry.originalfirstthunks
                 elif isinstance(entry.firstthunks, struct_array):
                     tmp_thunk = entry.firstthunks
@@ -457,6 +457,11 @@ class DirImport(CStruct):
                     rva += len(imp)
 
     def build_content(self, raw):
+        if self.parent_head._wsize == 32:
+            mask_ptr = 0x80000000
+        elif self.parent_head._wsize == 64:
+            mask_ptr = 0x8000000000000000
+
         dirimp = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_IMPORT]
         of1 = dirimp.rva
         if not of1:  # No Import
@@ -918,7 +923,7 @@ class DirDelay(CStruct):
         return out, off
 
     def sete(self, entries):
-        return "".join(bytes(entry) for entry in entries) + b"\x00" * (4 * 8)  # DelayDesc_e
+        return b"".join(bytes(entry) for entry in entries) + b"\x00" * (4 * 8)  # DelayDesc_e
 
     def __len__(self):
         rva_size = self.parent_head._wsize // 8
@@ -1306,19 +1311,6 @@ class DirRes(CStruct):
 
         out = []
         tmp_off = off
-        for _ in range(nbr):
-            if tmp_off >= ofend:
-                break
-            if tmp_off + length >= len(raw):
-                log.warn('warning bad resource offset')
-                break
-            try:
-                entry, length = ResEntry.unpack_l(raw, tmp_off, self.parent_head)
-            except RuntimeError:
-                log.warn('bad resentry')
-                return None, tmp_off
-            out.append(entry)
-            tmp_off += length
         resdesc.resentries = struct_array(self, raw,
                                           off,
                                           ResEntry,
@@ -1334,7 +1326,7 @@ class DirRes(CStruct):
                     # data dir
                     off = entry.offsettodata
                     if not 0 <= off < len(raw):
-                        log.warn('bad resrouce entry')
+                        log.warn('bad resource entry')
                         continue
                     data = ResDataEntry.unpack(raw,
                                                off,
@@ -1348,7 +1340,7 @@ class DirRes(CStruct):
                     log.warn('warning recusif subdir')
                     continue
                 if not 0 <= off < len(self.parent_head.img_rva):
-                    log.warn('bad resrouce entry')
+                    log.warn('bad resource entry')
                     continue
                 subdir, length = ResDesc_e.unpack_l(raw,
                                                     off,
@@ -1360,7 +1352,7 @@ class DirRes(CStruct):
                                                      ResEntry,
                                                      nbr)
                 except RuntimeError:
-                    log.warn('bad resrouce entry')
+                    log.warn('bad resource entry')
                     continue
 
                 entry.subdir = subdir
@@ -1372,17 +1364,21 @@ class DirRes(CStruct):
             return
         of1 = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].rva
         raw[self.parent_head.rva2off(of1)] = bytes(self.resdesc)
-        dir_todo = {self.parent_head.NThdr.optentries[
-            DIRECTORY_ENTRY_RESOURCE].rva: self.resdesc}
+        length = len(self.resdesc)
+        dir_todo = {
+            self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].rva: self.resdesc
+        }
+        of1 = of1 + length
+        raw[self.parent_head.rva2off(of1)] = bytes(self.resdesc.resentries)
         dir_done = {}
         while dir_todo:
             of1, my_dir = dir_todo.popitem()
             dir_done[of1] = my_dir
             raw[self.parent_head.rva2off(of1)] = bytes(my_dir)
             of1 += len(my_dir)
+            raw[self.parent_head.rva2off(of1)] = bytes(my_dir.resentries)
             of_base = of1
             for entry in my_dir.resentries:
-                raw[of_base] = bytes(entry)
                 of_base += len(entry)
                 if entry.name_s:
                     raw[self.parent_head.rva2off(entry.name)] = bytes(entry.name_s)
diff --git a/miasm/os_dep/common.py b/miasm/os_dep/common.py
index 4a92ef2a..74100817 100644
--- a/miasm/os_dep/common.py
+++ b/miasm/os_dep/common.py
@@ -71,15 +71,15 @@ class heap(object):
         self.addr &= self.mask ^ (self.align - 1)
         return ret
 
-    def alloc(self, jitter, size, perm=PAGE_READ | PAGE_WRITE):
+    def alloc(self, jitter, size, perm=PAGE_READ | PAGE_WRITE, cmt=""):
         """
         @jitter: a jitter instance
         @size: the size to allocate
         @perm: permission flags (see vm_alloc doc)
         """
-        return self.vm_alloc(jitter.vm, size, perm)
+        return self.vm_alloc(jitter.vm, size, perm=perm, cmt=cmt)
 
-    def vm_alloc(self, vm, size, perm=PAGE_READ | PAGE_WRITE):
+    def vm_alloc(self, vm, size, perm=PAGE_READ | PAGE_WRITE, cmt=""):
         """
         @vm: a VmMngr instance
         @size: the size to allocate
@@ -91,7 +91,7 @@ class heap(object):
             addr,
             perm,
             b"\x00" * (size),
-            "Heap alloc by %s" % get_caller_name(2)
+            "Heap alloc by %s %s" % (get_caller_name(2), cmt)
         )
         return addr
 
diff --git a/miasm/os_dep/linux/syscall.py b/miasm/os_dep/linux/syscall.py
index 3b1275aa..acebe2cb 100644
--- a/miasm/os_dep/linux/syscall.py
+++ b/miasm/os_dep/linux/syscall.py
@@ -681,7 +681,7 @@ def sys_x86_64_connect(jitter, linux_env):
     log.debug("sys_connect(%x, %r, %x)", fd, raddr, addrlen)
 
     # Stub
-    # Always refuse the connexion
+    # Always refuse the connection
     jitter.cpu.RAX = -1
 
 
diff --git a/miasm/os_dep/win_api_x86_32.py b/miasm/os_dep/win_api_x86_32.py
index c1870d97..568a646d 100644
--- a/miasm/os_dep/win_api_x86_32.py
+++ b/miasm/os_dep/win_api_x86_32.py
@@ -157,6 +157,9 @@ class c_winobjs(object):
         self.cryptcontext_num = 0
         self.cryptcontext = {}
         self.phhash_crypt_md5 = 0x55555
+        # key used by EncodePointer and DecodePointer
+        # (kernel32)
+        self.ptr_encode_key = 0xabababab
         self.files_hwnd = {}
         self.windowlong_dw = 0x77700
         self.module_cur_hwnd = 0x88800
@@ -272,7 +275,7 @@ class mdl(object):
 
 def kernel32_HeapAlloc(jitter):
     ret_ad, args = jitter.func_args_stdcall(["heap", "flags", "size"])
-    alloc_addr = winobjs.heap.alloc(jitter, args.size)
+    alloc_addr = winobjs.heap.alloc(jitter, args.size, cmt=hex(ret_ad))
     jitter.func_ret_stdcall(ret_ad, alloc_addr)
 
 
@@ -420,6 +423,36 @@ def kernel32_CloseHandle(jitter):
     ret_ad, _ = jitter.func_args_stdcall(["hwnd"])
     jitter.func_ret_stdcall(ret_ad, 1)
 
+def kernel32_EncodePointer(jitter):
+    """
+        PVOID EncodePointer(
+            _In_ PVOID Ptr
+        );
+
+        Encoding globally available pointers helps protect them from being
+        exploited. The EncodePointer function obfuscates the pointer value
+        with a secret so that it cannot be predicted by an external agent.
+        The secret used by EncodePointer is different for each process.
+
+        A pointer must be decoded before it can be used.
+
+    """
+    ret, args = jitter.func_args_stdcall(1)
+    jitter.func_ret_stdcall(ret, args[0] ^ winobjs.ptr_encode_key)
+    return True
+
+def kernel32_DecodePointer(jitter):
+    """
+        PVOID DecodePointer(
+           PVOID Ptr
+        );
+
+        The function returns the decoded pointer.
+
+    """
+    ret, args = jitter.func_args_stdcall(1)
+    jitter.func_ret_stdcall(ret, args[0] ^ winobjs.ptr_encode_key)
+    return True
 
 def user32_GetForegroundWindow(jitter):
     ret_ad, _ = jitter.func_args_stdcall(0)
@@ -505,7 +538,7 @@ def advapi32_CryptHashData(jitter):
 
     data = jitter.vm.get_mem(args.pbdata, args.dwdatalen)
     log.debug('will hash %X', args.dwdatalen)
-    log.debug(repr(data[:10]) + "...")
+    log.debug(repr(data[:0x10]) + "...")
     winobjs.cryptcontext[args.hhash].h.update(data)
     jitter.func_ret_stdcall(ret_ad, 1)
 
@@ -518,12 +551,18 @@ def advapi32_CryptGetHashParam(jitter):
         raise ValueError("unknown crypt context")
 
     if args.param == 2:
+        # HP_HASHVAL
         # XXX todo: save h state?
         h = winobjs.cryptcontext[args.hhash].h.digest()
+        jitter.vm.set_mem(args.pbdata, h)
+        jitter.vm.set_u32(args.dwdatalen, len(h))
+    elif args.param == 4:
+        # HP_HASHSIZE
+        ret = winobjs.cryptcontext[args.hhash].h.digest_size
+        jitter.vm.set_u32(args.pbdata, ret)
+        jitter.vm.set_u32(args.dwdatalen, 4)
     else:
         raise ValueError('not impl', args.param)
-    jitter.vm.set_mem(args.pbdata, h)
-    jitter.vm.set_u32(args.dwdatalen, len(h))
 
     jitter.func_ret_stdcall(ret_ad, 1)
 
@@ -606,7 +645,7 @@ def kernel32_CreateFile(jitter, funcname, get_str):
                         h = open(sb_fname, 'r+b')
                         ret = winobjs.handle_pool.add(sb_fname, h)
                 else:
-                    log.warning("FILE %r DOES NOT EXIST!", fname)
+                    log.warning("FILE %r (%s) DOES NOT EXIST!", fname, sb_fname)
             elif args.dwcreationdisposition == 1:
                 # create new
                 if os.access(sb_fname, os.R_OK):
@@ -759,11 +798,13 @@ def kernel32_VirtualProtect(jitter):
         jitter.vm.set_u32(args.lpfloldprotect, ACCESS_DICT_INV[old])
 
     paddr = args.lpvoid - (args.lpvoid % winobjs.alloc_align)
-    psize = args.dwsize
+    paddr_max = (args.lpvoid + args.dwsize + winobjs.alloc_align - 1)
+    paddr_max_round = paddr_max - (paddr_max % winobjs.alloc_align)
+    psize = paddr_max_round - paddr
     for addr, items in list(winobjs.allocated_pages.items()):
         alloc_addr, alloc_size = items
-        if not (alloc_addr <= paddr and
-                paddr + psize <= alloc_addr + alloc_size):
+        if (paddr + psize <= alloc_addr or
+            paddr > alloc_addr + alloc_size):
             continue
         size = jitter.vm.get_all_memory()[addr]["size"]
         # Page is included in Protect area
@@ -1112,21 +1153,22 @@ def kernel32_GetCommandLineW(jitter):
 def shell32_CommandLineToArgvW(jitter):
     ret_ad, args = jitter.func_args_stdcall(["pcmd", "pnumargs"])
     cmd = get_win_str_w(jitter, args.pcmd)
+    if cmd.startswith('"') and cmd.endswith('"'):
+        cmd = cmd[1:-1]
     log.info("CommandLineToArgv %r", cmd)
     tks = cmd.split(' ')
     addr = winobjs.heap.alloc(jitter, len(cmd) * 2 + 4 * len(tks))
     addr_ret = winobjs.heap.alloc(jitter, 4 * (len(tks) + 1))
     o = 0
     for i, t in enumerate(tks):
-        jitter.set_win_str_w(addr + o, t)
+        set_win_str_w(jitter, addr + o, t)
         jitter.vm.set_u32(addr_ret + 4 * i, addr + o)
         o += len(t)*2 + 2
 
-    jitter.vm.set_u32(addr_ret + 4 * i, 0)
+    jitter.vm.set_u32(addr_ret + 4 * (i+1), 0)
     jitter.vm.set_u32(args.pnumargs, len(tks))
     jitter.func_ret_stdcall(ret_ad, addr_ret)
 
-
 def cryptdll_MD5Init(jitter):
     ret_ad, args = jitter.func_args_stdcall(["ad_ctx"])
     index = len(winobjs.cryptdll_md5_h)
@@ -1333,7 +1375,7 @@ def ntoskrnl_RtlGetVersion(jitter):
                     0x2,  # min vers
                     0x666,  # build nbr
                     0x2,   # platform id
-                    ) + jitter.set_win_str_w("Service pack 4")
+                    ) + encode_win_str_w("Service pack 4")
 
     jitter.vm.set_mem(args.ptr_version, s)
     jitter.func_ret_stdcall(ret_ad, 0)
@@ -1519,7 +1561,7 @@ def kernel32_lstrcpy(jitter):
 def msvcrt__mbscpy(jitter):
     ret_ad, args = jitter.func_args_cdecl(["ptr_str1", "ptr_str2"])
     s2 = get_win_str_w(jitter, args.ptr_str2)
-    jitter.set_win_str_w(args.ptr_str1, s2)
+    set_win_str_w(jitter, args.ptr_str1, s2)
     jitter.func_ret_cdecl(ret_ad, args.ptr_str1)
 
 def msvcrt_wcscpy(jitter):
@@ -1533,7 +1575,7 @@ def kernel32_lstrcpyn(jitter):
     if len(s2) >= args.mlen:
         s2 = s2[:args.mlen - 1]
     log.info("Copy '%r'", s2)
-    jitter.set_win_str_a(args.ptr_str1, s2)
+    set_win_str_a(jitter, args.ptr_str1, s2)
     jitter.func_ret_stdcall(ret_ad, args.ptr_str1)
 
 
@@ -1628,15 +1670,82 @@ def kernel32_GetVolumeInformationW(jitter):
 
 
 def kernel32_MultiByteToWideChar(jitter):
+    MB_ERR_INVALID_CHARS = 0x8
+    CP_ACP  = 0x000
+    CP_1252 = 0x4e4
+
     ret_ad, args = jitter.func_args_stdcall(["codepage", "dwflags",
                                              "lpmultibytestr",
                                              "cbmultibyte",
                                              "lpwidecharstr",
                                              "cchwidechar"])
-    src = get_win_str_a(jitter, args.lpmultibytestr)
-    l = len(src) + 1
-    set_win_str_w(jitter, args.lpwidecharstr, src)
-    jitter.func_ret_stdcall(ret_ad, l)
+    if args.codepage != CP_ACP and args.codepage != CP_1252:
+        raise NotImplementedError
+    # according to MSDN:
+    # "Note that, if cbMultiByte is 0, the function fails."
+    if args.cbmultibyte == 0:
+        raise ValueError
+    # according to MSDN:
+    # "Alternatively, this parameter can be set to -1 if the string is
+    #  null-terminated."
+    if args.cbmultibyte == 0xffffffff:
+        src_len = 0
+        while jitter.vm.get_mem(args.lpmultibytestr + src_len, 1) != b'\0':
+            src_len += 1
+        src = jitter.vm.get_mem(args.lpmultibytestr, src_len)
+    else:
+        src = jitter.vm.get_mem(args.lpmultibytestr, args.cbmultibyte)
+    if args.dwflags & MB_ERR_INVALID_CHARS:
+        # will raise an exception if decoding fails
+        s = src.decode("cp1252", errors="replace").encode("utf-16le")
+    else:
+        # silently replace undecodable chars with U+FFFD
+        s = src.decode("cp1252", errors="replace").encode("utf-16le")
+    if args.cchwidechar > 0:
+        # return value is number of bytes written
+        retval = min(args.cchwidechar, len(s))
+        jitter.vm.set_mem(args.lpwidecharstr, s[:retval])
+    else:
+        # return value is number of bytes to write
+        # i.e., size of dest. buffer to allocate
+        retval = len(s)
+    jitter.func_ret_stdcall(ret_ad, retval)
+
+
+def kernel32_WideCharToMultiByte(jitter):
+    """
+        int WideCharToMultiByte(
+          UINT                               CodePage,
+          DWORD                              dwFlags,
+          _In_NLS_string_(cchWideChar)LPCWCH lpWideCharStr,
+          int                                cchWideChar,
+          LPSTR                              lpMultiByteStr,
+          int                                cbMultiByte,
+          LPCCH                              lpDefaultChar,
+          LPBOOL                             lpUsedDefaultChar
+        );
+
+    """
+    CP_ACP  = 0x000
+    CP_1252 = 0x4e4
+
+    ret, args = jitter.func_args_stdcall([
+        'CodePage', 'dwFlags', 'lpWideCharStr', 'cchWideChar',
+        'lpMultiByteStr', 'cbMultiByte', 'lpDefaultChar', 'lpUsedDefaultChar',
+      ])
+    if args.CodePage != CP_ACP and args.CodePage != CP_1252:
+        raise NotImplementedError
+    src = jitter.vm.get_mem(args.lpWideCharStr, args.cchWideChar * 2)
+    dst = src.decode("utf-16le").encode("cp1252", errors="replace")
+    if args.cbMultiByte > 0:
+        # return value is the number of bytes written
+        retval = min(args.cbMultiByte, len(dst))
+        jitter.vm.set_mem(args.lpMultiByteStr, dst[:retval])
+    else:
+        # return value is the size of the buffer to allocate
+        # to get the multibyte string
+        retval = len(dst)
+    jitter.func_ret_stdcall(ret, retval)
 
 
 def my_GetEnvironmentVariable(jitter, funcname, get_str, set_str, mylen):
@@ -1870,6 +1979,7 @@ def ntdll_LdrLoadDll(jitter):
     libname = s.lower()
 
     ad = winobjs.runtime_dll.lib_get_add_base(libname)
+    log.info("Loading %r ret 0x%x", s, ad)
     jitter.vm.set_u32(args.modhandle, ad)
 
     jitter.func_ret_stdcall(ret_ad, 0)
@@ -1911,7 +2021,7 @@ def msvcrt_memset(jitter):
 def msvcrt_strrchr(jitter):
     ret_ad, args = jitter.func_args_cdecl(['pstr','c'])
     s = get_win_str_a(jitter, args.pstr)
-    c = int_to_byte(args.c)
+    c = int_to_byte(args.c).decode()
     ret = args.pstr + s.rfind(c)
     log.info("strrchr(%x '%s','%s') = %x" % (args.pstr,s,c,ret))
     jitter.func_ret_cdecl(ret_ad, ret)
@@ -1919,7 +2029,7 @@ def msvcrt_strrchr(jitter):
 def msvcrt_wcsrchr(jitter):
     ret_ad, args = jitter.func_args_cdecl(['pstr','c'])
     s = get_win_str_w(jitter, args.pstr)
-    c = int_to_byte(args.c)
+    c = int_to_byte(args.c).decode()
     ret = args.pstr + (s.rfind(c)*2)
     log.info("wcsrchr(%x '%s',%s) = %x" % (args.pstr,s,c,ret))
     jitter.func_ret_cdecl(ret_ad, ret)
@@ -2339,13 +2449,88 @@ def user32_GetKeyboardType(jitter):
 
     jitter.func_ret_stdcall(ret_ad, ret)
 
+    
+class startupinfo(object):
+    """
+        typedef struct _STARTUPINFOA {
+          /* 00000000 */ DWORD  cb;
+          /* 00000004 */ LPSTR  lpReserved;
+          /* 00000008 */ LPSTR  lpDesktop;
+          /* 0000000C */ LPSTR  lpTitle;
+          /* 00000010 */ DWORD  dwX;
+          /* 00000014 */ DWORD  dwY;
+          /* 00000018 */ DWORD  dwXSize;
+          /* 0000001C */ DWORD  dwYSize;
+          /* 00000020 */ DWORD  dwXCountChars;
+          /* 00000024 */ DWORD  dwYCountChars;
+          /* 00000028 */ DWORD  dwFillAttribute;
+          /* 0000002C */ DWORD  dwFlags;
+          /* 00000030 */ WORD   wShowWindow;
+          /* 00000032 */ WORD   cbReserved2;
+          /* 00000034 */ LPBYTE lpReserved2;
+          /* 00000038 */ HANDLE hStdInput;
+          /* 0000003C */ HANDLE hStdOutput;
+          /* 00000040 */ HANDLE hStdError;
+        } STARTUPINFOA, *LPSTARTUPINFOA;
+
+    """
+    # TODO: fill with relevant values
+    # for now, struct is just a placeholder
+    cb = 0x0
+    lpReserved = 0x0
+    lpDesktop = 0x0
+    lpTitle = 0x0
+    dwX = 0x0
+    dwY = 0x0
+    dwXSize = 0x0
+    dwYSize = 0x0
+    dwXCountChars = 0x0
+    dwYCountChars = 0x0
+    dwFillAttribute = 0x0
+    dwFlags = 0x0
+    wShowWindow = 0x0
+    cbReserved2 = 0x0
+    lpReserved2 = 0x0
+    hStdInput = 0x0
+    hStdOutput = 0x0
+    hStdError = 0x0
+
+    def pack(self):
+        return struct.pack('IIIIIIIIIIIIHHIIII',
+                self.cb,
+                self.lpReserved,
+                self.lpDesktop,
+                self.lpTitle,
+                self.dwX,
+                self.dwY,
+                self.dwXSize,
+                self.dwYSize,
+                self.dwXCountChars,
+                self.dwYCountChars,
+                self.dwFillAttribute,
+                self.dwFlags,
+                self.wShowWindow,
+                self.cbReserved2,
+                self.lpReserved2,
+                self.hStdInput,
+                self.hStdOutput,
+                self.hStdError)
+
 
 def kernel32_GetStartupInfo(jitter, funcname, set_str):
-    ret_ad, args = jitter.func_args_stdcall(["ptr"])
+    """
+        void GetStartupInfo(
+          LPSTARTUPINFOW lpStartupInfo
+        );
 
-    s = b"\x00" * 0x2c + b"\x81\x00\x00\x00" + b"\x0a"
+        Retrieves the contents of the STARTUPINFO structure that was specified
+        when the calling process was created.
+        
+        https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-getstartupinfow
 
-    jitter.vm.set_mem(args.ptr, s)
+    """
+    ret_ad, args = jitter.func_args_stdcall(["ptr"])
+    jitter.vm.set_mem(args.ptr, startupinfo().pack())
     jitter.func_ret_stdcall(ret_ad, args.ptr)
 
 
@@ -2877,7 +3062,7 @@ class win32_find_data(object):
         for k, v in viewitems(kargs):
             setattr(self, k, v)
 
-    def toStruct(self):
+    def toStruct(self, encode_str=encode_win_str_w):
         s = struct.pack('=IQQQIIII',
                         self.fileattrib,
                         self.creationtime,
@@ -2887,10 +3072,10 @@ class win32_find_data(object):
                         self.filesizelow,
                         self.dwreserved0,
                         self.dwreserved1)
-        fname = self.cfilename.encode('utf-8') + b'\x00' * MAX_PATH
+        fname = encode_str(self.cfilename) + b'\x00' * MAX_PATH
         fname = fname[:MAX_PATH]
         s += fname
-        fname = self.alternamefilename.encode('utf-8') + b'\x00' * 14
+        fname = encode_str(self.alternamefilename) + b'\x00' * 14
         fname = fname[:14]
         s += fname
         return s
@@ -2927,33 +3112,66 @@ class find_data_mngr(object):
 
         return fname
 
-
-def kernel32_FindFirstFileA(jitter):
-    ret_ad, args = jitter.func_args_stdcall(["pfilepattern", "pfindfiledata"])
-
-    filepattern = get_win_str_a(jitter, args.pfilepattern)
+def my_FindFirstFile(jitter, pfilepattern, pfindfiledata, get_win_str, encode_str):
+    filepattern = get_win_str(jitter, pfilepattern)
     h = winobjs.find_data.findfirst(filepattern)
 
     fname = winobjs.find_data.findnext(h)
     fdata = win32_find_data(cfilename=fname)
 
-    jitter.vm.set_mem(args.pfindfiledata, fdata.toStruct())
+    jitter.vm.set_mem(pfindfiledata, fdata.toStruct(encode_str=encode_str))
+    return h
+
+def kernel32_FindFirstFileA(jitter):
+    ret_ad, args = jitter.func_args_stdcall(["pfilepattern", "pfindfiledata"])
+    h = my_FindFirstFile(jitter, args.pfilepattern, args.pfindfiledata,
+                           get_win_str_a, encode_win_str_a)
     jitter.func_ret_stdcall(ret_ad, h)
 
+def kernel32_FindFirstFileW(jitter):
+    ret_ad, args = jitter.func_args_stdcall(["pfilepattern", "pfindfiledata"])
+    h = my_FindFirstFile(jitter, args.pfilepattern, args.pfindfiledata,
+                           get_win_str_w, encode_win_str_w)
+    jitter.func_ret_stdcall(ret_ad, h)
 
-def kernel32_FindNextFileA(jitter):
-    ret_ad, args = jitter.func_args_stdcall(["handle", "pfindfiledata"])
+def kernel32_FindFirstFileExA(jitter):
+    ret_ad, args = jitter.func_args_stdcall([
+        "lpFileName",
+        "fInfoLevelId",
+        "lpFindFileData",
+        "fSearchOp",
+        "lpSearchFilter",
+        "dwAdditionalFlags"])
+    h = my_FindFirstFile(jitter, args.lpFileName, args.lpFindFileData,
+                         get_win_str_a, encode_win_str_a)
+    jitter.func_ret_stdcall(ret_ad, h)
+
+def kernel32_FindFirstFileExW(jitter):
+    ret_ad, args = jitter.func_args_stdcall([
+        "lpFileName",
+        "fInfoLevelId",
+        "lpFindFileData",
+        "fSearchOp",
+        "lpSearchFilter",
+        "dwAdditionalFlags"])
+    h = my_FindFirstFile(jitter, args.lpFileName, args.lpFindFileData,
+                         get_win_str_w, encode_win_str_w)
+    jitter.func_ret_stdcall(ret_ad, h)
 
+def my_FindNextFile(jitter, encode_str):
+    ret_ad, args = jitter.func_args_stdcall(["handle", "pfindfiledata"])
     fname = winobjs.find_data.findnext(args.handle)
     if fname is None:
+        winobjs.lastwin32error = 0x12 # ERROR_NO_MORE_FILES
         ret = 0
     else:
         ret = 1
         fdata = win32_find_data(cfilename=fname)
-        jitter.vm.set_mem(args.pfindfiledata, fdata.toStruct())
-
+        jitter.vm.set_mem(args.pfindfiledata, fdata.toStruct(encode_str=encode_str))
     jitter.func_ret_stdcall(ret_ad, ret)
 
+kernel32_FindNextFileA = lambda jitter: my_FindNextFile(jitter, encode_win_str_a)
+kernel32_FindNextFileW = lambda jitter: my_FindNextFile(jitter, encode_win_str_w)
 
 def kernel32_GetNativeSystemInfo(jitter):
     ret_ad, args = jitter.func_args_stdcall(["sys_ptr"])
@@ -3080,7 +3298,7 @@ class FLS(object):
         '''
         DWORD FlsAlloc(
           PFLS_CALLBACK_FUNCTION lpCallback
-        );    
+        );
         '''
         ret_ad, args = jitter.func_args_stdcall(["lpCallback"])
         index = len(self.slots)
@@ -3097,7 +3315,7 @@ class FLS(object):
         ret_ad, args = jitter.func_args_stdcall(["dwFlsIndex", "lpFlsData"])
         self.slots[args.dwFlsIndex] = args.lpFlsData
         jitter.func_ret_stdcall(ret_ad, 1)
-        
+
     def kernel32_FlsGetValue(self, jitter):
         '''
         PVOID FlsGetValue(
@@ -3105,8 +3323,8 @@ class FLS(object):
         );
         '''
         ret_ad, args = jitter.func_args_stdcall(["dwFlsIndex"])
-        jitter.func_ret_stdcall(ret_ad, self.slots[args.dwFlsIndex])        
-        
+        jitter.func_ret_stdcall(ret_ad, self.slots[args.dwFlsIndex])
+
 fls = FLS()
 
 
@@ -3129,15 +3347,15 @@ def kernel32_GetStdHandle(jitter):
     HANDLE WINAPI GetStdHandle(
       _In_ DWORD nStdHandle
     );
-    
-    STD_INPUT_HANDLE (DWORD)-10 	
+
+    STD_INPUT_HANDLE (DWORD)-10
     The standard input device. Initially, this is the console input buffer, CONIN$.
 
-    STD_OUTPUT_HANDLE (DWORD)-11 	
+    STD_OUTPUT_HANDLE (DWORD)-11
     The standard output device. Initially, this is the active console screen buffer, CONOUT$.
 
-    STD_ERROR_HANDLE (DWORD)-12 	
-    The standard error device. Initially, this is the active console screen buffer, CONOUT$.    
+    STD_ERROR_HANDLE (DWORD)-12
+    The standard error device. Initially, this is the active console screen buffer, CONOUT$.
     '''
     ret_ad, args = jitter.func_args_stdcall(["nStdHandle"])
     jitter.func_ret_stdcall(ret_ad, {
@@ -3146,7 +3364,7 @@ def kernel32_GetStdHandle(jitter):
         STD_INPUT_HANDLE: 3,
     }[args.nStdHandle])
 
-    
+
 FILE_TYPE_UNKNOWN = 0x0000
 FILE_TYPE_CHAR = 0x0002
 
@@ -3226,13 +3444,13 @@ def kernel32_IsProcessorFeaturePresent(jitter):
         17: False,
     }[args.ProcessorFeature])
 
-    
+
 def kernel32_GetACP(jitter):
     '''
     UINT GetACP();
     '''
     ret_ad, args = jitter.func_args_stdcall([])
-    # Windows-1252: Latin 1 / Western European  Superset of ISO-8859-1 (without C1 controls). 
+    # Windows-1252: Latin 1 / Western European  Superset of ISO-8859-1 (without C1 controls).
     jitter.func_ret_stdcall(ret_ad, 1252)
 
 
@@ -3257,7 +3475,7 @@ def kernel32_IsValidCodePage(jitter):
     );
     '''
     ret_ad, args = jitter.func_args_stdcall(["CodePage"])
-    jitter.func_ret_stdcall(ret_ad, args.CodePage in VALID_CODE_PAGES)    
+    jitter.func_ret_stdcall(ret_ad, args.CodePage in VALID_CODE_PAGES)
 
 
 def kernel32_GetCPInfo(jitter):
@@ -3270,8 +3488,102 @@ def kernel32_GetCPInfo(jitter):
     ret_ad, args = jitter.func_args_stdcall(["CodePage", "lpCPInfo"])
     assert args.CodePage == 1252
     # ref: http://www.rensselaer.org/dept/cis/software/g77-mingw32/include/winnls.h
-    #define MAX_LEADBYTES 	12
+    #define MAX_LEADBYTES       12
     #define MAX_DEFAULTCHAR	2
     jitter.vm.set_mem(args.lpCPInfo, struct.pack('<I', 0x1) + b'??' + b'\x00' * 12)
     jitter.func_ret_stdcall(ret_ad, 1)
-    
+
+
+def kernel32_GetStringTypeW(jitter):
+    """
+        BOOL GetStringTypeW(
+          DWORD                         dwInfoType,
+          _In_NLS_string_(cchSrc)LPCWCH lpSrcStr,
+          int                           cchSrc,
+          LPWORD                        lpCharType
+        );
+
+        Retrieves character type information for the characters in the specified
+        Unicode source string. For each character in the string, the function
+        sets one or more bits in the corresponding 16-bit element of the output
+        array. Each bit identifies a given character type, for example, letter,
+        digit, or neither.
+
+    """
+    # These types support ANSI C and POSIX (LC_CTYPE) character typing
+    # functions.A bitwise-OR of these values is retrieved in the array in the
+    # output buffer when dwInfoType is set to CT_CTYPE1. For DBCS locales, the
+    # type attributes apply to both narrow characters and wide characters. The
+    # Japanese hiragana and katakana characters, and the kanji ideograph
+    # characters all have the C1_ALPHA attribute.
+    CT_TYPE1 = 0x01
+    # TODO handle other types of information
+    # (CT_TYPE2, CT_TYPE3)
+    # for now, they raise NotImplemented
+    CT_TYPE2 = 0x02
+    CT_TYPE3 = 0x03
+
+    C1_UPPER   = 0x0001  # Uppercase
+    C1_LOWER   = 0x0002  # Lowercase
+    C1_DIGIT   = 0x0004  # Decimal digits
+    C1_SPACE   = 0x0008  # Space characters
+    C1_PUNCT   = 0x0010  # Punctuation
+    C1_CNTRL   = 0x0020  # Control characters
+    C1_BLANK   = 0x0040  # Blank characters
+    C1_XDIGIT  = 0x0080  # Hexadecimal digits
+    C1_ALPHA   = 0x0100  # Any linguistic character: alphabetical, syllabary, or ideographic
+    C1_DEFINED = 0x0200  # A defined character, but not one of the other C1_* types
+
+    # the following sets have been generated from the Linux python library curses
+    # e.g., C1_PUNCT_SET = [chr(i) for i in range(256) if curses.ascii.ispunct(chr(i))]
+    C1_PUNCT_SET = ['!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',',
+            '-', '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']',
+            '^', '_', '`', '{', '|', '}', '~']
+    C1_CNTRL_SET = ['\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06',
+            '\x07', '\x08', '\t', '\n', '\x0b', '\x0c', '\r', '\x0e', '\x0f',
+            '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
+            '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
+            '\x7f']
+    C1_BLANK_SET = ['\t', ' ']
+    C1_XDIGIT_SET = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A',
+            'B', 'C', 'D', 'E', 'F', 'a', 'b', 'c', 'd', 'e', 'f']
+
+    ret, args = jitter.func_args_stdcall(['dwInfoType', 'lpSrcStr', 'cchSrc',
+        'lpCharType'])
+    s = jitter.vm.get_mem(args.lpSrcStr, args.cchSrc).decode("utf-16")
+    if args.dwInfoType == CT_TYPE1:
+        # iterate over characters from the decoded W string
+        for i, c in enumerate(s):
+            # TODO handle non-ascii characters
+            if not c.isascii():
+                continue
+            val = 0
+            if c.isupper():
+                val |= C1_UPPER
+            if c.islower():
+                val |= C1_LOWER
+            if c.isdigit():
+                val |= C1_DIGIT
+            if c.isspace():
+                val |= C1_SPACE
+            if c in C1_PUNCT_SET:
+                val |= C1_PUNCT
+            if c in C1_CNTRL_SET:
+                val |= C1_CNTRL
+            if c in C1_BLANK_SET:
+                val |= C1_BLANK
+            if c in C1_XDIGIT_SET:
+                val |= C1_XDIGIT
+            if c.isalpha():
+                val |= C1_ALPHA
+            if val == 0:
+                val = C1_DEFINED
+            jitter.vm.set_u16(args.lpCharType + i * 2, val)
+    elif args.dwInfoType == CT_TYPE2:
+        raise NotImplemented
+    elif args.dwInfoType == CT_TYPE3:
+        raise NotImplemented
+    else:
+        raise ValueError("CT_TYPE unknown: %i" % args.dwInfoType)
+    jitter.func_ret_stdcall(ret, 1)
+    return True
diff --git a/miasm/os_dep/win_api_x86_32_seh.py b/miasm/os_dep/win_api_x86_32_seh.py
index 28699d68..57416477 100644
--- a/miasm/os_dep/win_api_x86_32_seh.py
+++ b/miasm/os_dep/win_api_x86_32_seh.py
@@ -189,18 +189,23 @@ def build_ldr_data(jitter, modules_info):
         "Loader struct"
     )  # (ldrdata.get_size() - offset))
 
+    last_module = modules_info.module2entry[
+        modules_info.modules[-1]]
+
     if main_pe:
         ldrdata.InLoadOrderModuleList.flink = main_addr_entry
-        ldrdata.InLoadOrderModuleList.blink = 0
+        ldrdata.InLoadOrderModuleList.blink = last_module
+
 
         ldrdata.InMemoryOrderModuleList.flink = main_addr_entry + \
             LdrDataEntry.get_type().get_offset("InMemoryOrderLinks")
-        ldrdata.InMemoryOrderModuleList.blink = 0
-
+        ldrdata.InMemoryOrderModuleList.blink = last_module + \
+            LdrDataEntry.get_type().get_offset("InMemoryOrderLinks")
     if ntdll_pe:
         ldrdata.InInitializationOrderModuleList.flink = ntdll_addr_entry + \
             LdrDataEntry.get_type().get_offset("InInitializationOrderLinks")
-        ldrdata.InInitializationOrderModuleList.blink = 0
+        ldrdata.InInitializationOrderModuleList.blink = last_module + \
+                LdrDataEntry.get_type().get_offset("InInitializationOrderLinks")
 
     # Add dummy dll base
     jitter.vm.add_memory_page(peb_ldr_data_address + 0x24,
@@ -312,9 +317,11 @@ def set_link_list_entry(jitter, loaded_modules, modules_info, offset):
             prev_module_entry = peb_ldr_data_address + 0xC
         if i == len(loaded_modules) - 1:
             next_module_entry = peb_ldr_data_address + 0xC
-        jitter.vm.set_mem(cur_module_entry + offset,
-                          (pck32(next_module_entry + offset) +
-                           pck32(prev_module_entry + offset)))
+
+        list_entry = ListEntry(jitter.vm, cur_module_entry + offset)
+        list_entry.flink = next_module_entry + offset
+        list_entry.blink = prev_module_entry + offset
+
 
 
 def fix_InLoadOrderModuleList(jitter, modules_info):
diff --git a/miasm/runtime/divti3.c b/miasm/runtime/divti3.c
new file mode 100644
index 00000000..fc5c1b4d
--- /dev/null
+++ b/miasm/runtime/divti3.c
@@ -0,0 +1,36 @@
+/* ===-- divti3.c - Implement __divti3 -------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __divti3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#if __x86_64
+
+#include "int_lib.h"
+#include "export.h"
+
+tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem);
+
+/* Returns: a / b */
+
+ti_int
+__divti3(ti_int a, ti_int b)
+{
+    const int bits_in_tword_m1 = (int)(sizeof(ti_int) * CHAR_BIT) - 1;
+    ti_int s_a = a >> bits_in_tword_m1;           /* s_a = a < 0 ? -1 : 0 */
+    ti_int s_b = b >> bits_in_tword_m1;           /* s_b = b < 0 ? -1 : 0 */
+    a = (a ^ s_a) - s_a;                         /* negate if s_a == -1 */
+    b = (b ^ s_b) - s_b;                         /* negate if s_b == -1 */
+    s_a ^= s_b;                                  /* sign of quotient */
+    return (__udivmodti4(a, b, (tu_int*)0) ^ s_a) - s_a;  /* negate if s_a == -1 */
+}
+
+#endif
diff --git a/miasm/runtime/export.h b/miasm/runtime/export.h
new file mode 100644
index 00000000..f21a83a8
--- /dev/null
+++ b/miasm/runtime/export.h
@@ -0,0 +1,10 @@
+#ifndef MIASM_RT_EXPORT_H
+#define MIASM_RT_EXPORT_H
+
+#ifdef _WIN32
+#define _MIASM_EXPORT __declspec(dllexport)
+#else
+#define _MIASM_EXPORT __attribute__((visibility("default")))
+#endif
+
+#endif
diff --git a/miasm/runtime/int_endianness.h b/miasm/runtime/int_endianness.h
new file mode 100644
index 00000000..def046c3
--- /dev/null
+++ b/miasm/runtime/int_endianness.h
@@ -0,0 +1,114 @@
+//===-- int_endianness.h - configuration header for compiler-rt -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a configuration header for compiler-rt.
+// This file is not part of the interface of this library.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INT_ENDIANNESS_H
+#define INT_ENDIANNESS_H
+
+#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) &&                \
+    defined(__ORDER_LITTLE_ENDIAN__)
+
+// Clang and GCC provide built-in endianness definitions.
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define _YUGA_LITTLE_ENDIAN 0
+#define _YUGA_BIG_ENDIAN 1
+#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define _YUGA_LITTLE_ENDIAN 1
+#define _YUGA_BIG_ENDIAN 0
+#endif // __BYTE_ORDER__
+
+#else // Compilers other than Clang or GCC.
+
+#if defined(__SVR4) && defined(__sun)
+#include <sys/byteorder.h>
+
+#if defined(_BIG_ENDIAN)
+#define _YUGA_LITTLE_ENDIAN 0
+#define _YUGA_BIG_ENDIAN 1
+#elif defined(_LITTLE_ENDIAN)
+#define _YUGA_LITTLE_ENDIAN 1
+#define _YUGA_BIG_ENDIAN 0
+#else // !_LITTLE_ENDIAN
+#error "unknown endianness"
+#endif // !_LITTLE_ENDIAN
+
+#endif // Solaris and AuroraUX.
+
+// ..
+
+#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__) ||   \
+    defined(__minix)
+#include <sys/endian.h>
+
+#if _BYTE_ORDER == _BIG_ENDIAN
+#define _YUGA_LITTLE_ENDIAN 0
+#define _YUGA_BIG_ENDIAN 1
+#elif _BYTE_ORDER == _LITTLE_ENDIAN
+#define _YUGA_LITTLE_ENDIAN 1
+#define _YUGA_BIG_ENDIAN 0
+#endif // _BYTE_ORDER
+
+#endif // *BSD
+
+#if defined(__OpenBSD__)
+#include <machine/endian.h>
+
+#if _BYTE_ORDER == _BIG_ENDIAN
+#define _YUGA_LITTLE_ENDIAN 0
+#define _YUGA_BIG_ENDIAN 1
+#elif _BYTE_ORDER == _LITTLE_ENDIAN
+#define _YUGA_LITTLE_ENDIAN 1
+#define _YUGA_BIG_ENDIAN 0
+#endif // _BYTE_ORDER
+
+#endif // OpenBSD
+
+// ..
+
+// Mac OSX has __BIG_ENDIAN__ or __LITTLE_ENDIAN__ automatically set by the
+// compiler (at least with GCC)
+#if defined(__APPLE__) || defined(__ellcc__)
+
+#ifdef __BIG_ENDIAN__
+#if __BIG_ENDIAN__
+#define _YUGA_LITTLE_ENDIAN 0
+#define _YUGA_BIG_ENDIAN 1
+#endif
+#endif // __BIG_ENDIAN__
+
+#ifdef __LITTLE_ENDIAN__
+#if __LITTLE_ENDIAN__
+#define _YUGA_LITTLE_ENDIAN 1
+#define _YUGA_BIG_ENDIAN 0
+#endif
+#endif // __LITTLE_ENDIAN__
+
+#endif // Mac OSX
+
+// ..
+
+#if defined(_WIN32)
+
+#define _YUGA_LITTLE_ENDIAN 1
+#define _YUGA_BIG_ENDIAN 0
+
+#endif // Windows
+
+#endif // Clang or GCC.
+
+// .
+
+#if !defined(_YUGA_LITTLE_ENDIAN) || !defined(_YUGA_BIG_ENDIAN)
+#error Unable to determine endian
+#endif // Check we found an endianness correctly.
+
+#endif // INT_ENDIANNESS_H
diff --git a/miasm/runtime/int_lib.h b/miasm/runtime/int_lib.h
new file mode 100644
index 00000000..7f5eb799
--- /dev/null
+++ b/miasm/runtime/int_lib.h
@@ -0,0 +1,148 @@
+//===-- int_lib.h - configuration header for compiler-rt  -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a configuration header for compiler-rt.
+// This file is not part of the interface of this library.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INT_LIB_H
+#define INT_LIB_H
+
+// Assumption: Signed integral is 2's complement.
+// Assumption: Right shift of signed negative is arithmetic shift.
+// Assumption: Endianness is little or big (not mixed).
+
+// ABI macro definitions
+
+#if __ARM_EABI__
+#ifdef COMPILER_RT_ARMHF_TARGET
+#define COMPILER_RT_ABI
+#else
+#define COMPILER_RT_ABI __attribute__((__pcs__("aapcs")))
+#endif
+#else
+#define COMPILER_RT_ABI
+#endif
+
+#define AEABI_RTABI __attribute__((__pcs__("aapcs")))
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#define ALWAYS_INLINE __forceinline
+#define NOINLINE __declspec(noinline)
+#define NORETURN __declspec(noreturn)
+#define UNUSED
+#else
+#define ALWAYS_INLINE __attribute__((always_inline))
+#define NOINLINE __attribute__((noinline))
+#define NORETURN __attribute__((noreturn))
+#define UNUSED __attribute__((unused))
+#endif
+
+#define STR(a) #a
+#define XSTR(a) STR(a)
+#define SYMBOL_NAME(name) XSTR(__USER_LABEL_PREFIX__) #name
+
+#if defined(__ELF__) || defined(__MINGW32__) || defined(__wasm__)
+#define COMPILER_RT_ALIAS(name, aliasname) \
+  COMPILER_RT_ABI __typeof(name) aliasname __attribute__((__alias__(#name)));
+#elif defined(__APPLE__)
+#if defined(VISIBILITY_HIDDEN)
+#define COMPILER_RT_ALIAS_VISIBILITY(name) \
+  __asm__(".private_extern " SYMBOL_NAME(name));
+#else
+#define COMPILER_RT_ALIAS_VISIBILITY(name)
+#endif
+#define COMPILER_RT_ALIAS(name, aliasname) \
+  __asm__(".globl " SYMBOL_NAME(aliasname)); \
+  COMPILER_RT_ALIAS_VISIBILITY(aliasname) \
+  __asm__(SYMBOL_NAME(aliasname) " = " SYMBOL_NAME(name)); \
+  COMPILER_RT_ABI __typeof(name) aliasname;
+#elif defined(_WIN32)
+#define COMPILER_RT_ALIAS(name, aliasname)
+#else
+#error Unsupported target
+#endif
+
+#if defined(__NetBSD__) && (defined(_KERNEL) || defined(_STANDALONE))
+//
+// Kernel and boot environment can't use normal headers,
+// so use the equivalent system headers.
+//
+#include <machine/limits.h>
+#include <sys/stdint.h>
+#include <sys/types.h>
+#else
+// Include the standard compiler builtin headers we use functionality from.
+#include <float.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+#endif
+
+// Include the commonly used internal type definitions.
+#include "int_types.h"
+
+// Include internal utility function declarations.
+#include "int_util.h"
+
+COMPILER_RT_ABI si_int __paritysi2(si_int a);
+COMPILER_RT_ABI si_int __paritydi2(di_int a);
+
+COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b);
+COMPILER_RT_ABI si_int __divsi3(si_int a, si_int b);
+COMPILER_RT_ABI su_int __udivsi3(su_int n, su_int d);
+
+COMPILER_RT_ABI su_int __udivmodsi4(su_int a, su_int b, su_int *rem);
+COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int *rem);
+#ifdef CRT_HAS_128BIT
+COMPILER_RT_ABI si_int __clzti2(ti_int a);
+COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem);
+#endif
+
+// Definitions for builtins unavailable on MSVC
+#if defined(_MSC_VER) && !defined(__clang__)
+#include <intrin.h>
+
+uint32_t __inline __builtin_ctz(uint32_t value) {
+  unsigned long trailing_zero = 0;
+  if (_BitScanForward(&trailing_zero, value))
+    return trailing_zero;
+  return 32;
+}
+
+uint32_t __inline __builtin_clz(uint32_t value) {
+  unsigned long leading_zero = 0;
+  if (_BitScanReverse(&leading_zero, value))
+    return 31 - leading_zero;
+  return 32;
+}
+
+#if defined(_M_ARM) || defined(_M_X64)
+uint32_t __inline __builtin_clzll(uint64_t value) {
+  unsigned long leading_zero = 0;
+  if (_BitScanReverse64(&leading_zero, value))
+    return 63 - leading_zero;
+  return 64;
+}
+#else
+uint32_t __inline __builtin_clzll(uint64_t value) {
+  if (value == 0)
+    return 64;
+  uint32_t msh = (uint32_t)(value >> 32);
+  uint32_t lsh = (uint32_t)(value & 0xFFFFFFFF);
+  if (msh != 0)
+    return __builtin_clz(msh);
+  return 32 + __builtin_clz(lsh);
+}
+#endif
+
+#define __builtin_clzl __builtin_clzll
+#endif // defined(_MSC_VER) && !defined(__clang__)
+
+#endif // INT_LIB_H
diff --git a/miasm/runtime/int_types.h b/miasm/runtime/int_types.h
new file mode 100644
index 00000000..f89220d5
--- /dev/null
+++ b/miasm/runtime/int_types.h
@@ -0,0 +1,174 @@
+//===-- int_lib.h - configuration header for compiler-rt  -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is not part of the interface of this library.
+//
+// This file defines various standard types, most importantly a number of unions
+// used to access parts of larger types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INT_TYPES_H
+#define INT_TYPES_H
+
+#include "int_endianness.h"
+
+// si_int is defined in Linux sysroot's asm-generic/siginfo.h
+#ifdef si_int
+#undef si_int
+#endif
+typedef int si_int;
+typedef unsigned su_int;
+
+typedef long long di_int;
+typedef unsigned long long du_int;
+
+typedef union {
+  di_int all;
+  struct {
+#if _YUGA_LITTLE_ENDIAN
+    su_int low;
+    si_int high;
+#else
+    si_int high;
+    su_int low;
+#endif // _YUGA_LITTLE_ENDIAN
+  } s;
+} dwords;
+
+typedef union {
+  du_int all;
+  struct {
+#if _YUGA_LITTLE_ENDIAN
+    su_int low;
+    su_int high;
+#else
+    su_int high;
+    su_int low;
+#endif // _YUGA_LITTLE_ENDIAN
+  } s;
+} udwords;
+
+#if defined(__LP64__) || defined(__wasm__) || defined(__mips64) ||             \
+    defined(__riscv) || defined(_WIN64)
+#define CRT_HAS_128BIT
+#endif
+
+// MSVC doesn't have a working 128bit integer type. Users should really compile
+// compiler-rt with clang, but if they happen to be doing a standalone build for
+// asan or something else, disable the 128 bit parts so things sort of work.
+#if defined(_MSC_VER) && !defined(__clang__)
+#undef CRT_HAS_128BIT
+#endif
+
+#ifdef CRT_HAS_128BIT
+typedef int ti_int __attribute__((mode(TI)));
+typedef unsigned tu_int __attribute__((mode(TI)));
+
+typedef union {
+  ti_int all;
+  struct {
+#if _YUGA_LITTLE_ENDIAN
+    du_int low;
+    di_int high;
+#else
+    di_int high;
+    du_int low;
+#endif // _YUGA_LITTLE_ENDIAN
+  } s;
+} twords;
+
+typedef union {
+  tu_int all;
+  struct {
+#if _YUGA_LITTLE_ENDIAN
+    du_int low;
+    du_int high;
+#else
+    du_int high;
+    du_int low;
+#endif // _YUGA_LITTLE_ENDIAN
+  } s;
+} utwords;
+
+static __inline ti_int make_ti(di_int h, di_int l) {
+  twords r;
+  r.s.high = h;
+  r.s.low = l;
+  return r.all;
+}
+
+static __inline tu_int make_tu(du_int h, du_int l) {
+  utwords r;
+  r.s.high = h;
+  r.s.low = l;
+  return r.all;
+}
+
+#endif // CRT_HAS_128BIT
+
+typedef union {
+  su_int u;
+  float f;
+} float_bits;
+
+typedef union {
+  udwords u;
+  double f;
+} double_bits;
+
+typedef struct {
+#if _YUGA_LITTLE_ENDIAN
+  udwords low;
+  udwords high;
+#else
+  udwords high;
+  udwords low;
+#endif // _YUGA_LITTLE_ENDIAN
+} uqwords;
+
+// Check if the target supports 80 bit extended precision long doubles.
+// Notably, on x86 Windows, MSVC only provides a 64-bit long double, but GCC
+// still makes it 80 bits. Clang will match whatever compiler it is trying to
+// be compatible with.
+#if ((defined(__i386__) || defined(__x86_64__)) && !defined(_MSC_VER)) ||      \
+    defined(__m68k__) || defined(__ia64__)
+#define HAS_80_BIT_LONG_DOUBLE 1
+#else
+#define HAS_80_BIT_LONG_DOUBLE 0
+#endif
+
+typedef union {
+  uqwords u;
+  long double f;
+} long_double_bits;
+
+#if __STDC_VERSION__ >= 199901L
+typedef float _Complex Fcomplex;
+typedef double _Complex Dcomplex;
+typedef long double _Complex Lcomplex;
+
+#define COMPLEX_REAL(x) __real__(x)
+#define COMPLEX_IMAGINARY(x) __imag__(x)
+#else
+typedef struct {
+  float real, imaginary;
+} Fcomplex;
+
+typedef struct {
+  double real, imaginary;
+} Dcomplex;
+
+typedef struct {
+  long double real, imaginary;
+} Lcomplex;
+
+#define COMPLEX_REAL(x) (x).real
+#define COMPLEX_IMAGINARY(x) (x).imaginary
+#endif
+#endif // INT_TYPES_H
diff --git a/miasm/runtime/int_util.h b/miasm/runtime/int_util.h
new file mode 100644
index 00000000..5fbdfb57
--- /dev/null
+++ b/miasm/runtime/int_util.h
@@ -0,0 +1,31 @@
+//===-- int_util.h - internal utility functions ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is not part of the interface of this library.
+//
+// This file defines non-inline utilities which are available for use in the
+// library. The function definitions themselves are all contained in int_util.c
+// which will always be compiled into any compiler-rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INT_UTIL_H
+#define INT_UTIL_H
+
+/// \brief Trigger a program abort (or panic for kernel code).
+#define compilerrt_abort() __compilerrt_abort_impl(__FILE__, __LINE__, __func__)
+
+NORETURN void __compilerrt_abort_impl(const char *file, int line,
+                                      const char *function);
+
+#define COMPILE_TIME_ASSERT(expr) COMPILE_TIME_ASSERT1(expr, __COUNTER__)
+#define COMPILE_TIME_ASSERT1(expr, cnt) COMPILE_TIME_ASSERT2(expr, cnt)
+#define COMPILE_TIME_ASSERT2(expr, cnt)                                        \
+  typedef char ct_assert_##cnt[(expr) ? 1 : -1] UNUSED
+
+#endif // INT_UTIL_H
diff --git a/miasm/runtime/udivmodti4.c b/miasm/runtime/udivmodti4.c
new file mode 100644
index 00000000..44a43be4
--- /dev/null
+++ b/miasm/runtime/udivmodti4.c
@@ -0,0 +1,196 @@
+//===-- udivmodti4.c - Implement __udivmodti4 -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __udivmodti4 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+#include "export.h"
+
+#ifdef CRT_HAS_128BIT
+
+// Effects: if rem != 0, *rem = a % b
+// Returns: a / b
+
+// Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide
+
+_MIASM_EXPORT tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem) {
+  const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
+  const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT;
+  utwords n;
+  n.all = a;
+  utwords d;
+  d.all = b;
+  utwords q;
+  utwords r;
+  unsigned sr;
+  // special cases, X is unknown, K != 0
+  if (n.s.high == 0) {
+    if (d.s.high == 0) {
+      // 0 X
+      // ---
+      // 0 X
+      if (rem)
+        *rem = n.s.low % d.s.low;
+      return n.s.low / d.s.low;
+    }
+    // 0 X
+    // ---
+    // K X
+    if (rem)
+      *rem = n.s.low;
+    return 0;
+  }
+  // n.s.high != 0
+  if (d.s.low == 0) {
+    if (d.s.high == 0) {
+      // K X
+      // ---
+      // 0 0
+      if (rem)
+        *rem = n.s.high % d.s.low;
+      return n.s.high / d.s.low;
+    }
+    // d.s.high != 0
+    if (n.s.low == 0) {
+      // K 0
+      // ---
+      // K 0
+      if (rem) {
+        r.s.high = n.s.high % d.s.high;
+        r.s.low = 0;
+        *rem = r.all;
+      }
+      return n.s.high / d.s.high;
+    }
+    // K K
+    // ---
+    // K 0
+    if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ {
+      if (rem) {
+        r.s.low = n.s.low;
+        r.s.high = n.s.high & (d.s.high - 1);
+        *rem = r.all;
+      }
+      return n.s.high >> __builtin_ctzll(d.s.high);
+    }
+    // K K
+    // ---
+    // K 0
+    sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high);
+    // 0 <= sr <= n_udword_bits - 2 or sr large
+    if (sr > n_udword_bits - 2) {
+      if (rem)
+        *rem = n.all;
+      return 0;
+    }
+    ++sr;
+    // 1 <= sr <= n_udword_bits - 1
+    // q.all = n.all << (n_utword_bits - sr);
+    q.s.low = 0;
+    q.s.high = n.s.low << (n_udword_bits - sr);
+    // r.all = n.all >> sr;
+    r.s.high = n.s.high >> sr;
+    r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
+  } else /* d.s.low != 0 */ {
+    if (d.s.high == 0) {
+      // K X
+      // ---
+      // 0 K
+      if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ {
+        if (rem)
+          *rem = n.s.low & (d.s.low - 1);
+        if (d.s.low == 1)
+          return n.all;
+        sr = __builtin_ctzll(d.s.low);
+        q.s.high = n.s.high >> sr;
+        q.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
+        return q.all;
+      }
+      // K X
+      // ---
+      // 0 K
+      sr = 1 + n_udword_bits + __builtin_clzll(d.s.low) -
+           __builtin_clzll(n.s.high);
+      // 2 <= sr <= n_utword_bits - 1
+      // q.all = n.all << (n_utword_bits - sr);
+      // r.all = n.all >> sr;
+      if (sr == n_udword_bits) {
+        q.s.low = 0;
+        q.s.high = n.s.low;
+        r.s.high = 0;
+        r.s.low = n.s.high;
+      } else if (sr < n_udword_bits) /* 2 <= sr <= n_udword_bits - 1 */ {
+        q.s.low = 0;
+        q.s.high = n.s.low << (n_udword_bits - sr);
+        r.s.high = n.s.high >> sr;
+        r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
+      } else /* n_udword_bits + 1 <= sr <= n_utword_bits - 1 */ {
+        q.s.low = n.s.low << (n_utword_bits - sr);
+        q.s.high = (n.s.high << (n_utword_bits - sr)) |
+                   (n.s.low >> (sr - n_udword_bits));
+        r.s.high = 0;
+        r.s.low = n.s.high >> (sr - n_udword_bits);
+      }
+    } else {
+      // K X
+      // ---
+      // K K
+      sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high);
+      // 0 <= sr <= n_udword_bits - 1 or sr large
+      if (sr > n_udword_bits - 1) {
+        if (rem)
+          *rem = n.all;
+        return 0;
+      }
+      ++sr;
+      // 1 <= sr <= n_udword_bits
+      // q.all = n.all << (n_utword_bits - sr);
+      // r.all = n.all >> sr;
+      q.s.low = 0;
+      if (sr == n_udword_bits) {
+        q.s.high = n.s.low;
+        r.s.high = 0;
+        r.s.low = n.s.high;
+      } else {
+        r.s.high = n.s.high >> sr;
+        r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
+        q.s.high = n.s.low << (n_udword_bits - sr);
+      }
+    }
+  }
+  // Not a special case
+  // q and r are initialized with:
+  // q.all = n.all << (n_utword_bits - sr);
+  // r.all = n.all >> sr;
+  // 1 <= sr <= n_utword_bits - 1
+  su_int carry = 0;
+  for (; sr > 0; --sr) {
+    // r:q = ((r:q)  << 1) | carry
+    r.s.high = (r.s.high << 1) | (r.s.low >> (n_udword_bits - 1));
+    r.s.low = (r.s.low << 1) | (q.s.high >> (n_udword_bits - 1));
+    q.s.high = (q.s.high << 1) | (q.s.low >> (n_udword_bits - 1));
+    q.s.low = (q.s.low << 1) | carry;
+    // carry = 0;
+    // if (r.all >= d.all)
+    // {
+    //     r.all -= d.all;
+    //      carry = 1;
+    // }
+    const ti_int s = (ti_int)(d.all - r.all - 1) >> (n_utword_bits - 1);
+    carry = s & 1;
+    r.all -= d.all & s;
+  }
+  q.all = (q.all << 1) | carry;
+  if (rem)
+    *rem = r.all;
+  return q.all;
+}
+
+#endif // CRT_HAS_128BIT
diff --git a/miasm/runtime/udivti3.c b/miasm/runtime/udivti3.c
new file mode 100644
index 00000000..3844dc9d
--- /dev/null
+++ b/miasm/runtime/udivti3.c
@@ -0,0 +1,24 @@
+//===-- udivti3.c - Implement __udivti3 -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __udivti3 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+#include "export.h"
+
+#ifdef CRT_HAS_128BIT
+
+// Returns: a / b
+
+_MIASM_EXPORT tu_int __udivti3(tu_int a, tu_int b) {
+  return __udivmodti4(a, b, 0);
+}
+
+#endif // CRT_HAS_128BIT
diff --git a/optional_requirements.txt b/optional_requirements.txt
index 71ebdbe8..39d92a93 100644
--- a/optional_requirements.txt
+++ b/optional_requirements.txt
@@ -1,3 +1,3 @@
 pycparser
 z3-solver==4.8.7.0
-llvmlite==0.26.0
+llvmlite==0.31.0
diff --git a/setup.py b/setup.py
index 4b149772..a20cf9a1 100644
--- a/setup.py
+++ b/setup.py
@@ -7,14 +7,20 @@ from distutils.sysconfig import get_python_lib, get_config_vars
 from distutils.dist import DistributionMetadata
 from distutils.command.install_data import install_data
 from tempfile import TemporaryFile
+import fnmatch
 import io
 import os
 import platform
-from shutil import copy2
+from shutil import copy2, copyfile, rmtree
 import sys
+import tempfile
+import atexit
 
 is_win = platform.system() == "Windows"
 is_mac = platform.system() == "Darwin"
+is_64bit = platform.architecture()[0] == "64bit"
+if is_win:
+    import winreg
 
 def set_extension_compile_args(extension):
     rel_lib_path = extension.name.replace('.', '/')
@@ -22,7 +28,6 @@ def set_extension_compile_args(extension):
     lib_name = abs_lib_path + '.so'
     extension.extra_link_args = [ '-Wl,-install_name,' + lib_name]
 
-
 class smart_install_data(install_data):
     """Replacement for distutils.command.install_data to handle
     configuration files location.
@@ -35,6 +40,48 @@ class smart_install_data(install_data):
         ]
         return install_data.run(self)
 
+def win_get_llvm_reg():
+    REG_PATH = "SOFTWARE\\LLVM\\LLVM"
+    try:
+      return winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, REG_PATH, 0, winreg.KEY_READ | winreg.KEY_WOW64_32KEY)
+    except FileNotFoundError:
+      pass
+    return winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, REG_PATH, 0, winreg.KEY_READ)
+  
+def win_find_clang_path():
+    try:
+        with win_get_llvm_reg() as rkey:
+            return winreg.QueryValueEx(rkey, None)[0]
+    except FileNotFoundError:
+        return None
+
+def win_use_clang():
+    # Recent (>= 8 ?) LLVM versions does not ship anymore a cl.exe binary in
+    # the msbuild-bin directory. Thus, we need to
+    # * copy-paste bin/clang-cl.exe into a temporary directory
+    # * rename it to cl.exe
+    # * add that path first in %Path%
+    # * clean this mess on exit
+    # We could use the build directory created by distutils for this, but it
+    # seems non trivial to gather
+    # (https://stackoverflow.com/questions/12896367/reliable-way-to-get-the-build-directory-from-within-setup-py).
+    clang_path = win_find_clang_path()
+    if clang_path is None:
+        return False
+    tmpdir = tempfile.mkdtemp(prefix="llvm")
+    copyfile(os.path.join(clang_path, "bin", "clang-cl.exe"), os.path.join(tmpdir, "cl.exe"))
+    os.environ['Path'] = "%s;%s" % (tmpdir, os.environ["Path"])
+    atexit.register(lambda dir_: rmtree(dir_), tmpdir)
+
+    return True
+
+win_force_clang = False
+if is_win and is_64bit:
+    # We do not change to clang if under 32 bits, because even with Clang we
+    # don't have uint128_t with the 32 bits ABI.
+    win_force_clang = win_use_clang()
+    if not win_force_clang:
+        print("Warning: couldn't find a Clang/LLVM installation. Some runtime functions needed by the jitter won't be compiled.")
 
 def buil_all():
     packages=[
@@ -153,6 +200,9 @@ def buil_all():
         Extension("miasm.jitter.Jitllvm",
                   ["miasm/jitter/Jitllvm.c",
                    "miasm/jitter/bn.c",
+                   "miasm/runtime/udivmodti4.c",
+                   "miasm/runtime/divti3.c",
+                   "miasm/runtime/udivti3.c"
                   ]),
         Extension("miasm.jitter.Jitgcc",
                   ["miasm/jitter/Jitgcc.c",
@@ -164,6 +214,10 @@ def buil_all():
         # Force setuptools to use whatever msvc version installed
         os.environ['MSSdk'] = '1'
         os.environ['DISTUTILS_USE_SDK'] = '1'
+        if win_force_clang:
+            march = "-m64" if is_64bit else "-m32"
+            for extension in ext_modules_all:
+                extension.extra_compile_args = [march]
     elif is_mac:
         for extension in ext_modules_all:
             set_extension_compile_args(extension)
@@ -248,7 +302,8 @@ def buil_all():
         for lib in libs:
             filename = os.path.basename(lib)
             dst = os.path.join(build_base, lib_dirname, "miasm", "jitter")
-            if filename not in ["VmMngr.lib", "Jitgcc.lib", "Jitllvm.lib"]:
+            # Windows built libraries may have a name like VmMngr.cp38-win_amd64.lib
+            if not any([fnmatch.fnmatch(filename, pattern) for pattern in ["VmMngr.*lib", "Jitgcc.*lib", "Jitllvm.*lib"]]):
                 dst = os.path.join(dst, "arch")
             dst = os.path.join(dst, filename)
             if not os.path.isfile(dst):
diff --git a/test/arch/arm/arch.py b/test/arch/arm/arch.py
index 5aa619ea..740655da 100644
--- a/test/arch/arm/arch.py
+++ b/test/arch/arm/arch.py
@@ -233,6 +233,15 @@ reg_tests_arm = [
     ('XXXXXXXX    PKHTB      R1, R2, R3 ASR 0x20',
      '531082e6'),
 
+    ('XXXXXXXX    MRC        p15, 0x0, R0, c1, c1, 0x0',
+     '110f11ee'),
+    ('XXXXXXXX    MCR        p15, 0x0, R8, c2, c0, 0x0',
+     '108f02ee'),
+    ('XXXXXXXX    MRCNE      p15, 0x0, R0, c1, c1, 0x0',
+     '110f111e'),
+    ('XXXXXXXX    MCRCC      p15, 0x0, R8, c2, c0, 0x1',
+     '308f023e'),
+
 
 ]
 ts = time.time()
diff --git a/test/arch/mips32/arch.py b/test/arch/mips32/arch.py
index e5e8cff6..de6d4547 100644
--- a/test/arch/mips32/arch.py
+++ b/test/arch/mips32/arch.py
@@ -214,6 +214,44 @@ reg_tests_mips32 = [
 
     ("XXXXXXXX    LDC1       F22, 0xFFFF9148(V0)",
      "D4569148"),
+
+    ("XXXXXXXX    BEQL       S0, V0, 0x124",
+     "52020048"),
+    ("XXXXXXXX    BGEZL      T3, 0x24",
+     "05630008"),
+    ("XXXXXXXX    BNEL       A0, ZERO, 0x2C",
+     "5480000A"),
+    ("XXXXXXXX    BLTZL      S6, 0x5C",
+     "06C20016"),
+    ("XXXXXXXX    BLEZL      V1, 0x80",
+     "5860001F"),
+    ("XXXXXXXX    BGTZL      S4, 0x14",
+     "5E800004"),
+    ("XXXXXXXX    BC1FL      FCC0, 0x24",
+     "45020008"),
+    ("XXXXXXXX    BC1TL      FCC0, 0xB8",
+     "4503002D"),
+
+    ("XXXXXXXX    CLZ        K0, K1",
+     "737AD020"),
+
+    ("XXXXXXXX    LL         A0, 0x123(A1)",
+     "C0A40123"),
+    ("XXXXXXXX    SC         A1, 0x123(A0)",
+     "E0850123"),
+
+    ("XXXXXXXX    SYNC       0x19",
+     "0000064F"),
+    ("XXXXXXXX    TLBR       ",
+     "42000001"),
+
+    ("XXXXXXXX    ERET       ",
+     "42000018"),
+
+    ("XXXXXXXX    MTHI       A0",
+     "00800011"),
+    ("XXXXXXXX    MTLO       A1",
+     "00A00013")
 ]
 
 
diff --git a/test/arch/ppc32/arch.py b/test/arch/ppc32/arch.py
index c10a046e..13c69c73 100644
--- a/test/arch/ppc32/arch.py
+++ b/test/arch/ppc32/arch.py
@@ -42,6 +42,7 @@ reg_tests = [
     ('b', "XXXXXXXX    LBZU       R0, 0x1(R31)", "8c1f0001"),
     ('b', "XXXXXXXX    LBZUX      R0, R31, R3", "7c1f18ee"),
     ('b', "XXXXXXXX    LBZX       R0, R30, R31", "7c1ef8ae"),
+    ('b', "XXXXXXXX    LFS        FPR6, 0x1(R1)", "c0c10001"),
     ('b', "XXXXXXXX    LHA        R9, 0x8(R31)", "a93f0008"),
     ('b', "XXXXXXXX    LHAU       R0, 0xFFFFFFFE(R9)", "ac09fffe"),
     ('b', "XXXXXXXX    LHAX       R0, R11, R9", "7c0b4aae"),
@@ -49,10 +50,16 @@ reg_tests = [
     ('b', "XXXXXXXX    LHZX       R0, R9, R10", "7c09522e"),
     ('b', "XXXXXXXX    LMW        R14, 0x8(R1)", "b9c10008"),
     ('b', "XXXXXXXX    LSWI       R5, R4, 0xC", "7ca464aa"),
+    ('b', "XXXXXXXX    LVEWX      VR0, R1, R2", "7c01108e"),
+    ('b', "XXXXXXXX    LVSL       VR0, R1, R2", "7c01100c"),
+    ('b', "XXXXXXXX    LVSR       VR0, R1, R2", "7c01104c"),
     ('b', "XXXXXXXX    LWZ        R0, 0x24(R1)", "80010024"),
     ('b', "XXXXXXXX    LWZU       R0, 0x4(R7)", "84070004"),
     ('b', "XXXXXXXX    LWZX       R29, R25, R0", "7fb9002e"),
     ('b', "XXXXXXXX    MCRF       CR1, CR2", "4c880000"),
+    ('b', "XXXXXXXX    MFFS       FPR23", "fee0048e"),
+    ('b', "XXXXXXXX    MTFSF      0x88, FPR6", "fd10358e"),
+    ('b', "XXXXXXXX    MTVSCR     VR0", "10000644"),
     ('b', "XXXXXXXX    MULLI      R0, R2, 0xFFFFFFE7", "1c02ffe7"),
     ('b', "XXXXXXXX    MULLI      R3, R30, 0xC", "1c7e000c"),
     ('b', "XXXXXXXX    NAND       R0, R0, R0", "7c0003b8"),
@@ -72,6 +79,7 @@ reg_tests = [
     ('b', "XXXXXXXX    SRW        R0, R23, R10", "7ee05430"),
     ('b', "XXXXXXXX    STB        R0, 0x1020(R30)", "981e1020"),
     ('b', "XXXXXXXX    STBU       R0, 0x1(R11)", "9c0b0001"),
+    ('b', "XXXXXXXX    STFS       FPR6, 0x1(R1)", "d0c10001"),
     ('b', "XXXXXXXX    STH        R6, (R3)", "b0c30000"),
     ('b', "XXXXXXXX    STMW       R14, 0x8(R1)", "bdc10008"),
     ('b', "XXXXXXXX    STW        R0, 0x24(R1)", "90010024"),
diff --git a/test/test_all.py b/test/test_all.py
index a8bf5330..71eccc6f 100755
--- a/test/test_all.py
+++ b/test/test_all.py
@@ -17,6 +17,7 @@ from utils import cosmetics, multithread
 from multiprocessing import Queue
 
 is_win = platform.system() == "Windows"
+is_64bit = platform.architecture()[0] == "64bit"
 
 testset = TestSet("../")
 TAGS = {"regression": "REGRESSION", # Regression tests
@@ -112,7 +113,7 @@ for script in ["x86/sem.py",
         if jitter in blacklist.get(script, []):
             continue
         tags = [TAGS[jitter]] if jitter in TAGS else []
-        if is_win and script.endswith("mn_div.py"):
+        if (not is_64bit) and script.endswith("mn_div.py"):
             continue
         testset += ArchUnitTest(script, jitter, base_dir="arch", tags=tags)
 
diff --git a/test/utils/testset.py b/test/utils/testset.py
index eee0e6f7..2bdb7450 100644
--- a/test/utils/testset.py
+++ b/test/utils/testset.py
@@ -203,7 +203,7 @@ class TestSet(object):
             try:
                 os.remove(product)
             except OSError:
-                print("Cleanning error: Unable to remove %s" % product)
+                print("Cleaning error: Unable to remove %s" % product)
 
     def add_additional_args(self, args):
         """Add arguments to used on the test command line