summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS2
-rw-r--r--disas/libvixl/README2
-rw-r--r--disas/libvixl/a64/assembler-a64.h363
-rw-r--r--disas/libvixl/a64/constants-a64.h68
-rw-r--r--disas/libvixl/a64/cpu-a64.h27
-rw-r--r--disas/libvixl/a64/decoder-a64.cc15
-rw-r--r--disas/libvixl/a64/decoder-a64.h1
-rw-r--r--disas/libvixl/a64/disasm-a64.cc88
-rw-r--r--disas/libvixl/a64/disasm-a64.h2
-rw-r--r--disas/libvixl/a64/instructions-a64.cc25
-rw-r--r--disas/libvixl/a64/instructions-a64.h10
-rw-r--r--disas/libvixl/platform.h8
-rw-r--r--disas/libvixl/utils.cc10
-rw-r--r--disas/libvixl/utils.h32
-rw-r--r--hw/arm/virt.c2
-rw-r--r--hw/intc/arm_gic.c17
-rw-r--r--hw/intc/arm_gic_common.c2
-rw-r--r--hw/usb/bus.c11
-rw-r--r--hw/usb/hcd-ehci-pci.c14
-rw-r--r--hw/usb/hcd-ehci.c29
-rw-r--r--hw/usb/hcd-ehci.h2
-rw-r--r--hw/usb/hcd-ohci.c27
-rw-r--r--hw/usb/hcd-uhci.c24
-rw-r--r--hw/usb/hcd-xhci.c50
-rw-r--r--include/hw/usb.h1
-rw-r--r--target-arm/cpu.h27
-rw-r--r--target-arm/cpu64.c3
-rw-r--r--target-arm/helper.c138
-rw-r--r--tests/Makefile10
-rw-r--r--tests/usb-hcd-ohci-test.c35
-rw-r--r--tests/usb-hcd-uhci-test.c35
-rw-r--r--tests/usb-hcd-xhci-test.c35
-rw-r--r--trace-events3
33 files changed, 961 insertions, 157 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 59940f97ad..a74c04c2a9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -614,7 +614,7 @@ USB
 M: Gerd Hoffmann <kraxel@redhat.com>
 S: Maintained
 F: hw/usb/*
-F: tests/usb-hcd-ehci-test.c
+F: tests/usb-*-test.c
 
 VFIO
 M: Alex Williamson <alex.williamson@redhat.com>
diff --git a/disas/libvixl/README b/disas/libvixl/README
index a0ecac3dd3..8301996e68 100644
--- a/disas/libvixl/README
+++ b/disas/libvixl/README
@@ -2,7 +2,7 @@
 The code in this directory is a subset of libvixl:
  https://github.com/armvixl/vixl
 (specifically, it is the set of files needed for disassembly only,
-taken from libvixl 1.4).
+taken from libvixl 1.5).
 Bugfixes should preferably be sent upstream initially.
 
 The disassembler does not currently support the entire A64 instruction
diff --git a/disas/libvixl/a64/assembler-a64.h b/disas/libvixl/a64/assembler-a64.h
index 1e2947b283..cc0b758dd3 100644
--- a/disas/libvixl/a64/assembler-a64.h
+++ b/disas/libvixl/a64/assembler-a64.h
@@ -28,6 +28,7 @@
 #define VIXL_A64_ASSEMBLER_A64_H_
 
 #include <list>
+#include <stack>
 
 #include "globals.h"
 #include "utils.h"
@@ -574,34 +575,107 @@ class MemOperand {
 
 class Label {
  public:
-  Label() : is_bound_(false), link_(NULL), target_(NULL) {}
+  Label() : location_(kLocationUnbound) {}
   ~Label() {
     // If the label has been linked to, it needs to be bound to a target.
     VIXL_ASSERT(!IsLinked() || IsBound());
   }
 
-  inline Instruction* link() const { return link_; }
-  inline Instruction* target() const { return target_; }
+  inline bool IsBound() const { return location_ >= 0; }
+  inline bool IsLinked() const { return !links_.empty(); }
 
-  inline bool IsBound() const { return is_bound_; }
-  inline bool IsLinked() const { return link_ != NULL; }
+ private:
+  // The list of linked instructions is stored in a stack-like structure. We
+  // don't use std::stack directly because it's slow for the common case where
+  // only one or two instructions refer to a label, and labels themselves are
+  // short-lived. This class behaves like std::stack, but the first few links
+  // are preallocated (configured by kPreallocatedLinks).
+  //
+  // If more than N links are required, this falls back to std::stack.
+  class LinksStack {
+   public:
+    LinksStack() : size_(0), links_extended_(NULL) {}
+    ~LinksStack() {
+      delete links_extended_;
+    }
 
-  inline void set_link(Instruction* new_link) { link_ = new_link; }
+    size_t size() const {
+      return size_;
+    }
 
-  static const int kEndOfChain = 0;
+    bool empty() const {
+      return size_ == 0;
+    }
 
- private:
-  // Indicates if the label has been bound, ie its location is fixed.
-  bool is_bound_;
-  // Branches instructions branching to this label form a chained list, with
-  // their offset indicating where the next instruction is located.
-  // link_ points to the latest branch instruction generated branching to this
-  // branch.
-  // If link_ is not NULL, the label has been linked to.
-  Instruction* link_;
+    void push(ptrdiff_t value) {
+      if (size_ < kPreallocatedLinks) {
+        links_[size_] = value;
+      } else {
+        if (links_extended_ == NULL) {
+          links_extended_ = new std::stack<ptrdiff_t>();
+        }
+        VIXL_ASSERT(size_ == (links_extended_->size() + kPreallocatedLinks));
+        links_extended_->push(value);
+      }
+      size_++;
+    }
+
+    ptrdiff_t top() const {
+      return (size_ <= kPreallocatedLinks) ? links_[size_ - 1]
+                                           : links_extended_->top();
+    }
+
+    void pop() {
+      size_--;
+      if (size_ >= kPreallocatedLinks) {
+        links_extended_->pop();
+        VIXL_ASSERT(size_ == (links_extended_->size() + kPreallocatedLinks));
+      }
+    }
+
+   private:
+    static const size_t kPreallocatedLinks = 4;
+
+    size_t size_;
+    ptrdiff_t links_[kPreallocatedLinks];
+    std::stack<ptrdiff_t> * links_extended_;
+  };
+
+  inline ptrdiff_t location() const { return location_; }
+
+  inline void Bind(ptrdiff_t location) {
+    // Labels can only be bound once.
+    VIXL_ASSERT(!IsBound());
+    location_ = location;
+  }
+
+  inline void AddLink(ptrdiff_t instruction) {
+    // If a label is bound, the assembler already has the information it needs
+    // to write the instruction, so there is no need to add it to links_.
+    VIXL_ASSERT(!IsBound());
+    links_.push(instruction);
+  }
+
+  inline ptrdiff_t GetAndRemoveNextLink() {
+    VIXL_ASSERT(IsLinked());
+    ptrdiff_t link = links_.top();
+    links_.pop();
+    return link;
+  }
+
+  // The offsets of the instructions that have linked to this label.
+  LinksStack links_;
   // The label location.
-  Instruction* target_;
+  ptrdiff_t location_;
 
+  static const ptrdiff_t kLocationUnbound = -1;
+
+  // It is not safe to copy labels, so disable the copy constructor by declaring
+  // it private (without an implementation).
+  Label(const Label&);
+
+  // The Assembler class is responsible for binding and linking labels, since
+  // the stored offsets need to be consistent with the Assembler's buffer.
   friend class Assembler;
 };
 
@@ -635,10 +709,49 @@ class Literal {
 };
 
 
+// Control whether or not position-independent code should be emitted.
+enum PositionIndependentCodeOption {
+  // All code generated will be position-independent; all branches and
+  // references to labels generated with the Label class will use PC-relative
+  // addressing.
+  PositionIndependentCode,
+
+  // Allow VIXL to generate code that refers to absolute addresses. With this
+  // option, it will not be possible to copy the code buffer and run it from a
+  // different address; code must be generated in its final location.
+  PositionDependentCode,
+
+  // Allow VIXL to assume that the bottom 12 bits of the address will be
+  // constant, but that the top 48 bits may change. This allows `adrp` to
+  // function in systems which copy code between pages, but otherwise maintain
+  // 4KB page alignment.
+  PageOffsetDependentCode
+};
+
+
+// Control how scaled- and unscaled-offset loads and stores are generated.
+enum LoadStoreScalingOption {
+  // Prefer scaled-immediate-offset instructions, but emit unscaled-offset,
+  // register-offset, pre-index or post-index instructions if necessary.
+  PreferScaledOffset,
+
+  // Prefer unscaled-immediate-offset instructions, but emit scaled-offset,
+  // register-offset, pre-index or post-index instructions if necessary.
+  PreferUnscaledOffset,
+
+  // Require scaled-immediate-offset instructions.
+  RequireScaledOffset,
+
+  // Require unscaled-immediate-offset instructions.
+  RequireUnscaledOffset
+};
+
+
 // Assembler.
 class Assembler {
  public:
-  Assembler(byte* buffer, unsigned buffer_size);
+  Assembler(byte* buffer, unsigned buffer_size,
+            PositionIndependentCodeOption pic = PositionIndependentCode);
 
   // The destructor asserts that one of the following is true:
   //  * The Assembler object has not been used.
@@ -662,12 +775,15 @@ class Assembler {
   // Label.
   // Bind a label to the current PC.
   void bind(Label* label);
-  int UpdateAndGetByteOffsetTo(Label* label);
-  inline int UpdateAndGetInstructionOffsetTo(Label* label) {
-    VIXL_ASSERT(Label::kEndOfChain == 0);
-    return UpdateAndGetByteOffsetTo(label) >> kInstructionSizeLog2;
-  }
 
+  // Return the address of a bound label.
+  template <typename T>
+  inline T GetLabelAddress(const Label * label) {
+    VIXL_ASSERT(label->IsBound());
+    VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
+    VIXL_STATIC_ASSERT(sizeof(*buffer_) == 1);
+    return reinterpret_cast<T>(buffer_ + label->location());
+  }
 
   // Instruction set functions.
 
@@ -733,6 +849,12 @@ class Assembler {
   // Calculate the address of a PC offset.
   void adr(const Register& rd, int imm21);
 
+  // Calculate the page address of a label.
+  void adrp(const Register& rd, Label* label);
+
+  // Calculate the page address of a PC offset.
+  void adrp(const Register& rd, int imm21);
+
   // Data Processing instructions.
   // Add.
   void add(const Register& rd,
@@ -1112,31 +1234,76 @@ class Assembler {
 
   // Memory instructions.
   // Load integer or FP register.
-  void ldr(const CPURegister& rt, const MemOperand& src);
+  void ldr(const CPURegister& rt, const MemOperand& src,
+           LoadStoreScalingOption option = PreferScaledOffset);
 
   // Store integer or FP register.
-  void str(const CPURegister& rt, const MemOperand& dst);
+  void str(const CPURegister& rt, const MemOperand& dst,
+           LoadStoreScalingOption option = PreferScaledOffset);
 
   // Load word with sign extension.
-  void ldrsw(const Register& rt, const MemOperand& src);
+  void ldrsw(const Register& rt, const MemOperand& src,
+             LoadStoreScalingOption option = PreferScaledOffset);
 
   // Load byte.
-  void ldrb(const Register& rt, const MemOperand& src);
+  void ldrb(const Register& rt, const MemOperand& src,
+            LoadStoreScalingOption option = PreferScaledOffset);
 
   // Store byte.
-  void strb(const Register& rt, const MemOperand& dst);
+  void strb(const Register& rt, const MemOperand& dst,
+            LoadStoreScalingOption option = PreferScaledOffset);
 
   // Load byte with sign extension.
-  void ldrsb(const Register& rt, const MemOperand& src);
+  void ldrsb(const Register& rt, const MemOperand& src,
+             LoadStoreScalingOption option = PreferScaledOffset);
 
   // Load half-word.
-  void ldrh(const Register& rt, const MemOperand& src);
+  void ldrh(const Register& rt, const MemOperand& src,
+            LoadStoreScalingOption option = PreferScaledOffset);
 
   // Store half-word.
-  void strh(const Register& rt, const MemOperand& dst);
+  void strh(const Register& rt, const MemOperand& dst,
+            LoadStoreScalingOption option = PreferScaledOffset);
 
   // Load half-word with sign extension.
-  void ldrsh(const Register& rt, const MemOperand& src);
+  void ldrsh(const Register& rt, const MemOperand& src,
+             LoadStoreScalingOption option = PreferScaledOffset);
+
+  // Load integer or FP register (with unscaled offset).
+  void ldur(const CPURegister& rt, const MemOperand& src,
+            LoadStoreScalingOption option = PreferUnscaledOffset);
+
+  // Store integer or FP register (with unscaled offset).
+  void stur(const CPURegister& rt, const MemOperand& src,
+            LoadStoreScalingOption option = PreferUnscaledOffset);
+
+  // Load word with sign extension.
+  void ldursw(const Register& rt, const MemOperand& src,
+              LoadStoreScalingOption option = PreferUnscaledOffset);
+
+  // Load byte (with unscaled offset).
+  void ldurb(const Register& rt, const MemOperand& src,
+             LoadStoreScalingOption option = PreferUnscaledOffset);
+
+  // Store byte (with unscaled offset).
+  void sturb(const Register& rt, const MemOperand& dst,
+             LoadStoreScalingOption option = PreferUnscaledOffset);
+
+  // Load byte with sign extension (and unscaled offset).
+  void ldursb(const Register& rt, const MemOperand& src,
+              LoadStoreScalingOption option = PreferUnscaledOffset);
+
+  // Load half-word (with unscaled offset).
+  void ldurh(const Register& rt, const MemOperand& src,
+             LoadStoreScalingOption option = PreferUnscaledOffset);
+
+  // Store half-word (with unscaled offset).
+  void sturh(const Register& rt, const MemOperand& dst,
+             LoadStoreScalingOption option = PreferUnscaledOffset);
+
+  // Load half-word with sign extension (and unscaled offset).
+  void ldursh(const Register& rt, const MemOperand& src,
+              LoadStoreScalingOption option = PreferUnscaledOffset);
 
   // Load integer or FP register pair.
   void ldp(const CPURegister& rt, const CPURegister& rt2,
@@ -1166,6 +1333,79 @@ class Assembler {
   // Load single precision floating point literal to FP register.
   void ldr(const FPRegister& ft, float imm);
 
+  // Store exclusive byte.
+  void stxrb(const Register& rs, const Register& rt, const MemOperand& dst);
+
+  // Store exclusive half-word.
+  void stxrh(const Register& rs, const Register& rt, const MemOperand& dst);
+
+  // Store exclusive register.
+  void stxr(const Register& rs, const Register& rt, const MemOperand& dst);
+
+  // Load exclusive byte.
+  void ldxrb(const Register& rt, const MemOperand& src);
+
+  // Load exclusive half-word.
+  void ldxrh(const Register& rt, const MemOperand& src);
+
+  // Load exclusive register.
+  void ldxr(const Register& rt, const MemOperand& src);
+
+  // Store exclusive register pair.
+  void stxp(const Register& rs,
+            const Register& rt,
+            const Register& rt2,
+            const MemOperand& dst);
+
+  // Load exclusive register pair.
+  void ldxp(const Register& rt, const Register& rt2, const MemOperand& src);
+
+  // Store-release exclusive byte.
+  void stlxrb(const Register& rs, const Register& rt, const MemOperand& dst);
+
+  // Store-release exclusive half-word.
+  void stlxrh(const Register& rs, const Register& rt, const MemOperand& dst);
+
+  // Store-release exclusive register.
+  void stlxr(const Register& rs, const Register& rt, const MemOperand& dst);
+
+  // Load-acquire exclusive byte.
+  void ldaxrb(const Register& rt, const MemOperand& src);
+
+  // Load-acquire exclusive half-word.
+  void ldaxrh(const Register& rt, const MemOperand& src);
+
+  // Load-acquire exclusive register.
+  void ldaxr(const Register& rt, const MemOperand& src);
+
+  // Store-release exclusive register pair.
+  void stlxp(const Register& rs,
+             const Register& rt,
+             const Register& rt2,
+             const MemOperand& dst);
+
+  // Load-acquire exclusive register pair.
+  void ldaxp(const Register& rt, const Register& rt2, const MemOperand& src);
+
+  // Store-release byte.
+  void stlrb(const Register& rt, const MemOperand& dst);
+
+  // Store-release half-word.
+  void stlrh(const Register& rt, const MemOperand& dst);
+
+  // Store-release register.
+  void stlr(const Register& rt, const MemOperand& dst);
+
+  // Load-acquire byte.
+  void ldarb(const Register& rt, const MemOperand& src);
+
+  // Load-acquire half-word.
+  void ldarh(const Register& rt, const MemOperand& src);
+
+  // Load-acquire register.
+  void ldar(const Register& rt, const MemOperand& src);
+
+
   // Move instructions. The default shift of -1 indicates that the move
   // instruction will calculate an appropriate 16-bit immediate and left shift
   // that is equal to the 64-bit immediate argument. If an explicit left shift
@@ -1214,6 +1454,9 @@ class Assembler {
   // System hint.
   void hint(SystemHint code);
 
+  // Clear exclusive monitor.
+  void clrex(int imm4 = 0xf);
+
   // Data memory barrier.
   void dmb(BarrierDomain domain, BarrierType type);
 
@@ -1429,6 +1672,11 @@ class Assembler {
     return rt2.code() << Rt2_offset;
   }
 
+  static Instr Rs(CPURegister rs) {
+    VIXL_ASSERT(rs.code() != kSPRegInternalCode);
+    return rs.code() << Rs_offset;
+  }
+
   // These encoding functions allow the stack pointer to be encoded, and
   // disallow the zero register.
   static Instr RdSP(Register rd) {
@@ -1619,6 +1867,11 @@ class Assembler {
     return imm7 << ImmHint_offset;
   }
 
+  static Instr CRm(int imm4) {
+    VIXL_ASSERT(is_uint4(imm4));
+    return imm4 << CRm_offset;
+  }
+
   static Instr ImmBarrierDomain(int imm2) {
     VIXL_ASSERT(is_uint2(imm2));
     return imm2 << ImmBarrierDomain_offset;
@@ -1660,16 +1913,20 @@ class Assembler {
   }
 
   // Size of the code generated in bytes
-  uint64_t SizeOfCodeGenerated() const {
+  size_t SizeOfCodeGenerated() const {
     VIXL_ASSERT((pc_ >= buffer_) && (pc_ < (buffer_ + buffer_size_)));
     return pc_ - buffer_;
   }
 
   // Size of the code generated since label to the current position.
-  uint64_t SizeOfCodeGeneratedSince(Label* label) const {
+  size_t SizeOfCodeGeneratedSince(Label* label) const {
+    size_t pc_offset = SizeOfCodeGenerated();
+
     VIXL_ASSERT(label->IsBound());
-    VIXL_ASSERT((pc_ >= label->target()) && (pc_ < (buffer_ + buffer_size_)));
-    return pc_ - label->target();
+    VIXL_ASSERT(pc_offset >= static_cast<size_t>(label->location()));
+    VIXL_ASSERT(pc_offset < buffer_size_);
+
+    return pc_offset - label->location();
   }
 
 
@@ -1693,6 +1950,15 @@ class Assembler {
   void EmitLiteralPool(LiteralPoolEmitOption option = NoJumpRequired);
   size_t LiteralPoolSize();
 
+  inline PositionIndependentCodeOption pic() {
+    return pic_;
+  }
+
+  inline bool AllowPageOffsetDependentCode() {
+    return (pic() == PageOffsetDependentCode) ||
+           (pic() == PositionDependentCode);
+  }
+
  protected:
   inline const Register& AppropriateZeroRegFor(const CPURegister& reg) const {
     return reg.Is64Bits() ? xzr : wzr;
@@ -1701,7 +1967,8 @@ class Assembler {
 
   void LoadStore(const CPURegister& rt,
                  const MemOperand& addr,
-                 LoadStoreOp op);
+                 LoadStoreOp op,
+                 LoadStoreScalingOption option = PreferScaledOffset);
   static bool IsImmLSUnscaled(ptrdiff_t offset);
   static bool IsImmLSScaled(ptrdiff_t offset, LSDataSize size);
 
@@ -1717,9 +1984,9 @@ class Assembler {
                         LogicalOp op);
   static bool IsImmLogical(uint64_t value,
                            unsigned width,
-                           unsigned* n,
-                           unsigned* imm_s,
-                           unsigned* imm_r);
+                           unsigned* n = NULL,
+                           unsigned* imm_s = NULL,
+                           unsigned* imm_r = NULL);
 
   void ConditionalCompare(const Register& rn,
                           const Operand& operand,
@@ -1823,6 +2090,17 @@ class Assembler {
 
   void RecordLiteral(int64_t imm, unsigned size);
 
+  // Link the current (not-yet-emitted) instruction to the specified label, then
+  // return an offset to be encoded in the instruction. If the label is not yet
+  // bound, an offset of 0 is returned.
+  ptrdiff_t LinkAndGetByteOffsetTo(Label * label);
+  ptrdiff_t LinkAndGetInstructionOffsetTo(Label * label);
+  ptrdiff_t LinkAndGetPageOffsetTo(Label * label);
+
+  // A common implementation for the LinkAndGet<Type>OffsetTo helpers.
+  template <int element_size>
+  ptrdiff_t LinkAndGetOffsetTo(Label* label);
+
   // Emit the instruction at pc_.
   void Emit(Instr instruction) {
     VIXL_STATIC_ASSERT(sizeof(*pc_) == 1);
@@ -1864,12 +2142,15 @@ class Assembler {
   // The buffer into which code and relocation info are generated.
   Instruction* buffer_;
   // Buffer size, in bytes.
-  unsigned buffer_size_;
+  size_t buffer_size_;
   Instruction* pc_;
   std::list<Literal*> literals_;
   Instruction* next_literal_pool_check_;
   unsigned literal_pool_monitor_;
 
+  PositionIndependentCodeOption pic_;
+
+  friend class Label;
   friend class BlockLiteralPoolScope;
 
 #ifdef DEBUG
diff --git a/disas/libvixl/a64/constants-a64.h b/disas/libvixl/a64/constants-a64.h
index 99677c1be3..7a14f85f59 100644
--- a/disas/libvixl/a64/constants-a64.h
+++ b/disas/libvixl/a64/constants-a64.h
@@ -46,13 +46,13 @@ R(24) R(25) R(26) R(27) R(28) R(29) R(30) R(31)
 
 #define INSTRUCTION_FIELDS_LIST(V_)                                            \
 /* Register fields */                                                          \
-V_(Rd, 4, 0, Bits)                        /* Destination register.     */      \
-V_(Rn, 9, 5, Bits)                        /* First source register.    */      \
-V_(Rm, 20, 16, Bits)                      /* Second source register.   */      \
-V_(Ra, 14, 10, Bits)                      /* Third source register.    */      \
-V_(Rt, 4, 0, Bits)                        /* Load dest / store source. */      \
-V_(Rt2, 14, 10, Bits)                     /* Load second dest /        */      \
-                                         /* store second source.      */       \
+V_(Rd, 4, 0, Bits)                        /* Destination register.        */   \
+V_(Rn, 9, 5, Bits)                        /* First source register.       */   \
+V_(Rm, 20, 16, Bits)                      /* Second source register.      */   \
+V_(Ra, 14, 10, Bits)                      /* Third source register.       */   \
+V_(Rt, 4, 0, Bits)                        /* Load/store register.         */   \
+V_(Rt2, 14, 10, Bits)                     /* Load/store second register.  */   \
+V_(Rs, 20, 16, Bits)                      /* Exclusive access status.     */   \
 V_(PrefetchMode, 4, 0, Bits)                                                   \
                                                                                \
 /* Common bits */                                                              \
@@ -126,6 +126,13 @@ V_(SysOp1, 18, 16, Bits)                                                       \
 V_(SysOp2, 7, 5, Bits)                                                         \
 V_(CRn, 15, 12, Bits)                                                          \
 V_(CRm, 11, 8, Bits)                                                           \
+                                                                               \
+/* Load-/store-exclusive */                                                    \
+V_(LdStXLoad, 22, 22, Bits)                                                    \
+V_(LdStXNotExclusive, 23, 23, Bits)                                            \
+V_(LdStXAcquireRelease, 15, 15, Bits)                                          \
+V_(LdStXSizeLog2, 31, 30, Bits)                                                \
+V_(LdStXPair, 21, 21, Bits)                                                    \
 
 
 #define SYSTEM_REGISTER_FIELDS_LIST(V_, M_)                                    \
@@ -585,6 +592,13 @@ enum MemBarrierOp {
   ISB             = MemBarrierFixed | 0x00000040
 };
 
+enum SystemExclusiveMonitorOp {
+  SystemExclusiveMonitorFixed = 0xD503305F,
+  SystemExclusiveMonitorFMask = 0xFFFFF0FF,
+  SystemExclusiveMonitorMask  = 0xFFFFF0FF,
+  CLREX                       = SystemExclusiveMonitorFixed
+};
+
 // Any load or store.
 enum LoadStoreAnyOp {
   LoadStoreAnyFMask = 0x0a000000,
@@ -702,7 +716,7 @@ enum LoadStoreUnscaledOffsetOp {
 
 // Load/store (post, pre, offset and unsigned.)
 enum LoadStoreOp {
-  LoadStoreOpMask   = 0xC4C00000,
+  LoadStoreOpMask = 0xC4C00000,
   #define LOAD_STORE(A, B, C, D)  \
   A##B##_##C = D
   LOAD_STORE_OP_LIST(LOAD_STORE),
@@ -756,6 +770,44 @@ enum LoadStoreRegisterOffset {
   #undef LOAD_STORE_REGISTER_OFFSET
 };
 
+enum LoadStoreExclusive {
+  LoadStoreExclusiveFixed = 0x08000000,
+  LoadStoreExclusiveFMask = 0x3F000000,
+  LoadStoreExclusiveMask  = 0xFFE08000,
+  STXRB_w  = LoadStoreExclusiveFixed | 0x00000000,
+  STXRH_w  = LoadStoreExclusiveFixed | 0x40000000,
+  STXR_w   = LoadStoreExclusiveFixed | 0x80000000,
+  STXR_x   = LoadStoreExclusiveFixed | 0xC0000000,
+  LDXRB_w  = LoadStoreExclusiveFixed | 0x00400000,
+  LDXRH_w  = LoadStoreExclusiveFixed | 0x40400000,
+  LDXR_w   = LoadStoreExclusiveFixed | 0x80400000,
+  LDXR_x   = LoadStoreExclusiveFixed | 0xC0400000,
+  STXP_w   = LoadStoreExclusiveFixed | 0x80200000,
+  STXP_x   = LoadStoreExclusiveFixed | 0xC0200000,
+  LDXP_w   = LoadStoreExclusiveFixed | 0x80600000,
+  LDXP_x   = LoadStoreExclusiveFixed | 0xC0600000,
+  STLXRB_w = LoadStoreExclusiveFixed | 0x00008000,
+  STLXRH_w = LoadStoreExclusiveFixed | 0x40008000,
+  STLXR_w  = LoadStoreExclusiveFixed | 0x80008000,
+  STLXR_x  = LoadStoreExclusiveFixed | 0xC0008000,
+  LDAXRB_w = LoadStoreExclusiveFixed | 0x00408000,
+  LDAXRH_w = LoadStoreExclusiveFixed | 0x40408000,
+  LDAXR_w  = LoadStoreExclusiveFixed | 0x80408000,
+  LDAXR_x  = LoadStoreExclusiveFixed | 0xC0408000,
+  STLXP_w  = LoadStoreExclusiveFixed | 0x80208000,
+  STLXP_x  = LoadStoreExclusiveFixed | 0xC0208000,
+  LDAXP_w  = LoadStoreExclusiveFixed | 0x80608000,
+  LDAXP_x  = LoadStoreExclusiveFixed | 0xC0608000,
+  STLRB_w  = LoadStoreExclusiveFixed | 0x00808000,
+  STLRH_w  = LoadStoreExclusiveFixed | 0x40808000,
+  STLR_w   = LoadStoreExclusiveFixed | 0x80808000,
+  STLR_x   = LoadStoreExclusiveFixed | 0xC0808000,
+  LDARB_w  = LoadStoreExclusiveFixed | 0x00C08000,
+  LDARH_w  = LoadStoreExclusiveFixed | 0x40C08000,
+  LDAR_w   = LoadStoreExclusiveFixed | 0x80C08000,
+  LDAR_x   = LoadStoreExclusiveFixed | 0xC0C08000
+};
+
 // Conditional compare.
 enum ConditionalCompareOp {
   ConditionalCompareMask = 0x60000000,
diff --git a/disas/libvixl/a64/cpu-a64.h b/disas/libvixl/a64/cpu-a64.h
index dfd8f015cf..59b7974a19 100644
--- a/disas/libvixl/a64/cpu-a64.h
+++ b/disas/libvixl/a64/cpu-a64.h
@@ -28,6 +28,7 @@
 #define VIXL_CPU_A64_H
 
 #include "globals.h"
+#include "instructions-a64.h"
 
 namespace vixl {
 
@@ -42,6 +43,32 @@ class CPU {
   // safely run.
   static void EnsureIAndDCacheCoherency(void *address, size_t length);
 
+  // Handle tagged pointers.
+  template <typename T>
+  static T SetPointerTag(T pointer, uint64_t tag) {
+    VIXL_ASSERT(is_uintn(kAddressTagWidth, tag));
+
+    // Use C-style casts to get static_cast behaviour for integral types (T),
+    // and reinterpret_cast behaviour for other types.
+
+    uint64_t raw = (uint64_t)pointer;
+    VIXL_STATIC_ASSERT(sizeof(pointer) == sizeof(raw));
+
+    raw = (raw & ~kAddressTagMask) | (tag << kAddressTagOffset);
+    return (T)raw;
+  }
+
+  template <typename T>
+  static uint64_t GetPointerTag(T pointer) {
+    // Use C-style casts to get static_cast behaviour for integral types (T),
+    // and reinterpret_cast behaviour for other types.
+
+    uint64_t raw = (uint64_t)pointer;
+    VIXL_STATIC_ASSERT(sizeof(pointer) == sizeof(raw));
+
+    return (raw & kAddressTagMask) >> kAddressTagOffset;
+  }
+
  private:
   // Return the content of the cache type register.
   static uint32_t GetCacheType();
diff --git a/disas/libvixl/a64/decoder-a64.cc b/disas/libvixl/a64/decoder-a64.cc
index 8450eb3b49..5831b73024 100644
--- a/disas/libvixl/a64/decoder-a64.cc
+++ b/disas/libvixl/a64/decoder-a64.cc
@@ -171,9 +171,9 @@ void Decoder::DecodePCRelAddressing(Instruction* instr) {
 
 void Decoder::DecodeBranchSystemException(Instruction* instr) {
   VIXL_ASSERT((instr->Bits(27, 24) == 0x4) ||
-         (instr->Bits(27, 24) == 0x5) ||
-         (instr->Bits(27, 24) == 0x6) ||
-         (instr->Bits(27, 24) == 0x7) );
+              (instr->Bits(27, 24) == 0x5) ||
+              (instr->Bits(27, 24) == 0x6) ||
+              (instr->Bits(27, 24) == 0x7) );
 
   switch (instr->Bits(31, 29)) {
     case 0:
@@ -272,16 +272,15 @@ void Decoder::DecodeBranchSystemException(Instruction* instr) {
 
 void Decoder::DecodeLoadStore(Instruction* instr) {
   VIXL_ASSERT((instr->Bits(27, 24) == 0x8) ||
-         (instr->Bits(27, 24) == 0x9) ||
-         (instr->Bits(27, 24) == 0xC) ||
-         (instr->Bits(27, 24) == 0xD) );
+              (instr->Bits(27, 24) == 0x9) ||
+              (instr->Bits(27, 24) == 0xC) ||
+              (instr->Bits(27, 24) == 0xD) );
 
   if (instr->Bit(24) == 0) {
     if (instr->Bit(28) == 0) {
       if (instr->Bit(29) == 0) {
         if (instr->Bit(26) == 0) {
-          // TODO: VisitLoadStoreExclusive.
-          VisitUnimplemented(instr);
+          VisitLoadStoreExclusive(instr);
         } else {
           DecodeAdvSIMDLoadStore(instr);
         }
diff --git a/disas/libvixl/a64/decoder-a64.h b/disas/libvixl/a64/decoder-a64.h
index bbbbd81247..72c15196ce 100644
--- a/disas/libvixl/a64/decoder-a64.h
+++ b/disas/libvixl/a64/decoder-a64.h
@@ -59,6 +59,7 @@
   V(LoadStorePreIndex)              \
   V(LoadStoreRegisterOffset)        \
   V(LoadStoreUnsignedOffset)        \
+  V(LoadStoreExclusive)             \
   V(LogicalShifted)                 \
   V(AddSubShifted)                  \
   V(AddSubExtended)                 \
diff --git a/disas/libvixl/a64/disasm-a64.cc b/disas/libvixl/a64/disasm-a64.cc
index f81ce4bbb8..248ebfd436 100644
--- a/disas/libvixl/a64/disasm-a64.cc
+++ b/disas/libvixl/a64/disasm-a64.cc
@@ -24,6 +24,7 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+#include <cstdlib>
 #include "a64/disasm-a64.h"
 
 namespace vixl {
@@ -529,7 +530,7 @@ void Disassembler::VisitExtract(Instruction* instr) {
 void Disassembler::VisitPCRelAddressing(Instruction* instr) {
   switch (instr->Mask(PCRelAddressingMask)) {
     case ADR: Format(instr, "adr", "'Xd, 'AddrPCRelByte"); break;
-    // ADRP is not implemented.
+    case ADRP: Format(instr, "adrp", "'Xd, 'AddrPCRelPage"); break;
     default: Format(instr, "unimplemented", "(PCRelAddressing)");
   }
 }
@@ -943,6 +944,49 @@ void Disassembler::VisitLoadStorePairNonTemporal(Instruction* instr) {
 }
 
 
+void Disassembler::VisitLoadStoreExclusive(Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form;
+
+  switch (instr->Mask(LoadStoreExclusiveMask)) {
+    case STXRB_w: mnemonic = "stxrb"; form = "'Ws, 'Wt, ['Xns]"; break;
+    case STXRH_w: mnemonic = "stxrh"; form = "'Ws, 'Wt, ['Xns]"; break;
+    case STXR_w: mnemonic = "stxr"; form = "'Ws, 'Wt, ['Xns]"; break;
+    case STXR_x: mnemonic = "stxr"; form = "'Ws, 'Xt, ['Xns]"; break;
+    case LDXRB_w: mnemonic = "ldxrb"; form = "'Wt, ['Xns]"; break;
+    case LDXRH_w: mnemonic = "ldxrh"; form = "'Wt, ['Xns]"; break;
+    case LDXR_w: mnemonic = "ldxr"; form = "'Wt, ['Xns]"; break;
+    case LDXR_x: mnemonic = "ldxr"; form = "'Xt, ['Xns]"; break;
+    case STXP_w: mnemonic = "stxp"; form = "'Ws, 'Wt, 'Wt2, ['Xns]"; break;
+    case STXP_x: mnemonic = "stxp"; form = "'Ws, 'Xt, 'Xt2, ['Xns]"; break;
+    case LDXP_w: mnemonic = "ldxp"; form = "'Wt, 'Wt2, ['Xns]"; break;
+    case LDXP_x: mnemonic = "ldxp"; form = "'Xt, 'Xt2, ['Xns]"; break;
+    case STLXRB_w: mnemonic = "stlxrb"; form = "'Ws, 'Wt, ['Xns]"; break;
+    case STLXRH_w: mnemonic = "stlxrh"; form = "'Ws, 'Wt, ['Xns]"; break;
+    case STLXR_w: mnemonic = "stlxr"; form = "'Ws, 'Wt, ['Xns]"; break;
+    case STLXR_x: mnemonic = "stlxr"; form = "'Ws, 'Xt, ['Xns]"; break;
+    case LDAXRB_w: mnemonic = "ldaxrb"; form = "'Wt, ['Xns]"; break;
+    case LDAXRH_w: mnemonic = "ldaxrh"; form = "'Wt, ['Xns]"; break;
+    case LDAXR_w: mnemonic = "ldaxr"; form = "'Wt, ['Xns]"; break;
+    case LDAXR_x: mnemonic = "ldaxr"; form = "'Xt, ['Xns]"; break;
+    case STLXP_w: mnemonic = "stlxp"; form = "'Ws, 'Wt, 'Wt2, ['Xns]"; break;
+    case STLXP_x: mnemonic = "stlxp"; form = "'Ws, 'Xt, 'Xt2, ['Xns]"; break;
+    case LDAXP_w: mnemonic = "ldaxp"; form = "'Wt, 'Wt2, ['Xns]"; break;
+    case LDAXP_x: mnemonic = "ldaxp"; form = "'Xt, 'Xt2, ['Xns]"; break;
+    case STLRB_w: mnemonic = "stlrb"; form = "'Wt, ['Xns]"; break;
+    case STLRH_w: mnemonic = "stlrh"; form = "'Wt, ['Xns]"; break;
+    case STLR_w: mnemonic = "stlr"; form = "'Wt, ['Xns]"; break;
+    case STLR_x: mnemonic = "stlr"; form = "'Xt, ['Xns]"; break;
+    case LDARB_w: mnemonic = "ldarb"; form = "'Wt, ['Xns]"; break;
+    case LDARH_w: mnemonic = "ldarh"; form = "'Wt, ['Xns]"; break;
+    case LDAR_w: mnemonic = "ldar"; form = "'Wt, ['Xns]"; break;
+    case LDAR_x: mnemonic = "ldar"; form = "'Xt, ['Xns]"; break;
+    default: form = "(LoadStoreExclusive)";
+  }
+  Format(instr, mnemonic, form);
+}
+
+
 void Disassembler::VisitFPCompare(Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "'Fn, 'Fm";
@@ -1162,7 +1206,15 @@ void Disassembler::VisitSystem(Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "(System)";
 
-  if (instr->Mask(SystemSysRegFMask) == SystemSysRegFixed) {
+  if (instr->Mask(SystemExclusiveMonitorFMask) == SystemExclusiveMonitorFixed) {
+    switch (instr->Mask(SystemExclusiveMonitorMask)) {
+      case CLREX: {
+        mnemonic = "clrex";
+        form = (instr->CRm() == 0xf) ? NULL : "'IX";
+        break;
+      }
+    }
+  } else if (instr->Mask(SystemSysRegFMask) == SystemSysRegFixed) {
     switch (instr->Mask(SystemSysRegMask)) {
       case MRS: {
         mnemonic = "mrs";
@@ -1184,7 +1236,6 @@ void Disassembler::VisitSystem(Instruction* instr) {
       }
     }
   } else if (instr->Mask(SystemHintFMask) == SystemHintFixed) {
-    VIXL_ASSERT(instr->Mask(SystemHintMask) == HINT);
     switch (instr->ImmHint()) {
       case NOP: {
         mnemonic = "nop";
@@ -1312,6 +1363,7 @@ int Disassembler::SubstituteRegisterField(Instruction* instr,
     case 'n': reg_num = instr->Rn(); break;
     case 'm': reg_num = instr->Rm(); break;
     case 'a': reg_num = instr->Ra(); break;
+    case 's': reg_num = instr->Rs(); break;
     case 't': {
       if (format[2] == '2') {
         reg_num = instr->Rt2();
@@ -1458,6 +1510,10 @@ int Disassembler::SubstituteImmediateField(Instruction* instr,
       AppendToOutput("#0x%" PRIx64, instr->ImmException());
       return 6;
     }
+    case 'X': {  // IX - CLREX instruction.
+      AppendToOutput("#0x%" PRIx64, instr->CRm());
+      return 2;
+    }
     default: {
       VIXL_UNIMPLEMENTED();
       return 0;
@@ -1564,21 +1620,20 @@ int Disassembler::SubstituteConditionField(Instruction* instr,
 
 int Disassembler::SubstitutePCRelAddressField(Instruction* instr,
                                               const char* format) {
-  USE(format);
-  VIXL_ASSERT(strncmp(format, "AddrPCRel", 9) == 0);
+  VIXL_ASSERT((strcmp(format, "AddrPCRelByte") == 0) ||   // Used by `adr`.
+              (strcmp(format, "AddrPCRelPage") == 0));    // Used by `adrp`.
 
-  int offset = instr->ImmPCRel();
+  int64_t offset = instr->ImmPCRel();
+  Instruction * base = instr;
 
-  // Only ADR (AddrPCRelByte) is supported.
-  VIXL_ASSERT(strcmp(format, "AddrPCRelByte") == 0);
-
-  char sign = '+';
-  if (offset < 0) {
-    offset = -offset;
-    sign = '-';
+  if (format[9] == 'P') {
+    offset *= kPageSize;
+    base = AlignDown(base, kPageSize);
   }
-  VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
-  AppendToOutput("#%c0x%x (addr %p)", sign, offset, instr + offset);
+
+  char sign = (offset < 0) ? '-' : '+';
+  void * target = reinterpret_cast<void *>(base + offset);
+  AppendToOutput("#%c0x%" PRIx64 " (addr %p)", sign, std::abs(offset), target);
   return 13;
 }
 
@@ -1606,7 +1661,8 @@ int Disassembler::SubstituteBranchTargetField(Instruction* instr,
     sign = '-';
   }
   VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
-  AppendToOutput("#%c0x%" PRIx64 " (addr %p)", sign, offset, instr + offset);
+  void * address = reinterpret_cast<void *>(instr + offset);
+  AppendToOutput("#%c0x%" PRIx64 " (addr %p)", sign, offset, address);
   return 8;
 }
 
diff --git a/disas/libvixl/a64/disasm-a64.h b/disas/libvixl/a64/disasm-a64.h
index 3a56e15515..06ee43fdeb 100644
--- a/disas/libvixl/a64/disasm-a64.h
+++ b/disas/libvixl/a64/disasm-a64.h
@@ -85,7 +85,7 @@ class Disassembler: public DecoderVisitor {
   bool IsMovzMovnImm(unsigned reg_size, uint64_t value);
 
   void ResetOutput();
-  void AppendToOutput(const char* string, ...);
+  void AppendToOutput(const char* string, ...) PRINTF_CHECK(2, 3);
 
   char* buffer_;
   uint32_t buffer_pos_;
diff --git a/disas/libvixl/a64/instructions-a64.cc b/disas/libvixl/a64/instructions-a64.cc
index c4eb7c4518..e9caceb37b 100644
--- a/disas/libvixl/a64/instructions-a64.cc
+++ b/disas/libvixl/a64/instructions-a64.cc
@@ -149,17 +149,24 @@ LSDataSize CalcLSPairDataSize(LoadStorePairOp op) {
 
 
 Instruction* Instruction::ImmPCOffsetTarget() {
+  Instruction * base = this;
   ptrdiff_t offset;
   if (IsPCRelAddressing()) {
-    // PC-relative addressing. Only ADR is supported.
+    // ADR and ADRP.
     offset = ImmPCRel();
+    if (Mask(PCRelAddressingMask) == ADRP) {
+      base = AlignDown(base, kPageSize);
+      offset *= kPageSize;
+    } else {
+      VIXL_ASSERT(Mask(PCRelAddressingMask) == ADR);
+    }
   } else {
     // All PC-relative branches.
     VIXL_ASSERT(BranchType() != UnknownBranchType);
     // Relative branch offsets are instruction-size-aligned.
     offset = ImmBranch() << kInstructionSizeLog2;
   }
-  return this + offset;
+  return base + offset;
 }
 
 
@@ -185,10 +192,16 @@ void Instruction::SetImmPCOffsetTarget(Instruction* target) {
 
 
 void Instruction::SetPCRelImmTarget(Instruction* target) {
-  // ADRP is not supported, so 'this' must point to an ADR instruction.
-  VIXL_ASSERT(Mask(PCRelAddressingMask) == ADR);
-
-  Instr imm = Assembler::ImmPCRelAddress(target - this);
+  int32_t imm21;
+  if ((Mask(PCRelAddressingMask) == ADR)) {
+    imm21 = target - this;
+  } else {
+    VIXL_ASSERT(Mask(PCRelAddressingMask) == ADRP);
+    uintptr_t this_page = reinterpret_cast<uintptr_t>(this) / kPageSize;
+    uintptr_t target_page = reinterpret_cast<uintptr_t>(target) / kPageSize;
+    imm21 = target_page - this_page;
+  }
+  Instr imm = Assembler::ImmPCRelAddress(imm21);
 
   SetInstructionBits(Mask(~ImmPCRel_mask) | imm);
 }
diff --git a/disas/libvixl/a64/instructions-a64.h b/disas/libvixl/a64/instructions-a64.h
index a4240d7d33..d5b90c52e1 100644
--- a/disas/libvixl/a64/instructions-a64.h
+++ b/disas/libvixl/a64/instructions-a64.h
@@ -41,6 +41,10 @@ const unsigned kLiteralEntrySize = 4;
 const unsigned kLiteralEntrySizeLog2 = 2;
 const unsigned kMaxLoadLiteralRange = 1 * MBytes;
 
+// This is the nominal page size (as used by the adrp instruction); the actual
+// size of the memory pages allocated by the kernel is likely to differ.
+const unsigned kPageSize = 4 * KBytes;
+
 const unsigned kWRegSize = 32;
 const unsigned kWRegSizeLog2 = 5;
 const unsigned kWRegSizeInBytes = kWRegSize / 8;
@@ -79,6 +83,12 @@ const unsigned kZeroRegCode = 31;
 const unsigned kSPRegInternalCode = 63;
 const unsigned kRegCodeMask = 0x1f;
 
+const unsigned kAddressTagOffset = 56;
+const unsigned kAddressTagWidth = 8;
+const uint64_t kAddressTagMask =
+    ((UINT64_C(1) << kAddressTagWidth) - 1) << kAddressTagOffset;
+VIXL_STATIC_ASSERT(kAddressTagMask == UINT64_C(0xff00000000000000));
+
 // AArch64 floating-point specifics. These match IEEE-754.
 const unsigned kDoubleMantissaBits = 52;
 const unsigned kDoubleExponentBits = 11;
diff --git a/disas/libvixl/platform.h b/disas/libvixl/platform.h
index b5c2085b90..de2b110cca 100644
--- a/disas/libvixl/platform.h
+++ b/disas/libvixl/platform.h
@@ -28,14 +28,10 @@
 #define PLATFORM_H
 
 // Define platform specific functionalities.
+#include <signal.h>
 
 namespace vixl {
-#ifdef USE_SIMULATOR
-// Currently we assume running the simulator implies running on x86 hardware.
-inline void HostBreakpoint() { asm("int3"); }
-#else
-inline void HostBreakpoint() { asm("brk"); }
-#endif
+inline void HostBreakpoint() { raise(SIGINT); }
 }  // namespace vixl
 
 #endif
diff --git a/disas/libvixl/utils.cc b/disas/libvixl/utils.cc
index c9c05d1e18..4d4fcbdad4 100644
--- a/disas/libvixl/utils.cc
+++ b/disas/libvixl/utils.cc
@@ -124,4 +124,14 @@ int CountSetBits(uint64_t value, int width) {
 
   return value;
 }
+
+
+uint64_t LowestSetBit(uint64_t value) {
+  return value & -value;
+}
+
+
+bool IsPowerOf2(int64_t value) {
+  return (value != 0) && ((value & (value - 1)) == 0);
+}
 }  // namespace vixl
diff --git a/disas/libvixl/utils.h b/disas/libvixl/utils.h
index 83c928c8e3..b472f0e6cd 100644
--- a/disas/libvixl/utils.h
+++ b/disas/libvixl/utils.h
@@ -33,6 +33,14 @@
 
 namespace vixl {
 
+// Macros for compile-time format checking.
+#if defined(__GNUC__)
+#define PRINTF_CHECK(format_index, varargs_index) \
+  __attribute__((format(printf, format_index, varargs_index)))
+#else
+#define PRINTF_CHECK(format_index, varargs_index)
+#endif
+
 // Check number width.
 inline bool is_intn(unsigned n, int64_t x) {
   VIXL_ASSERT((0 < n) && (n < 64));
@@ -155,6 +163,8 @@ int CountLeadingZeros(uint64_t value, int width);
 int CountLeadingSignBits(int64_t value, int width);
 int CountTrailingZeros(uint64_t value, int width);
 int CountSetBits(uint64_t value, int width);
+uint64_t LowestSetBit(uint64_t value);
+bool IsPowerOf2(int64_t value);
 
 // Pointer alignment
 // TODO: rename/refactor to make it specific to instructions.
@@ -167,21 +177,31 @@ bool IsWordAligned(T pointer) {
 // Increment a pointer until it has the specified alignment.
 template<class T>
 T AlignUp(T pointer, size_t alignment) {
-  VIXL_STATIC_ASSERT(sizeof(pointer) == sizeof(uintptr_t));
-  uintptr_t pointer_raw = reinterpret_cast<uintptr_t>(pointer);
+  // Use C-style casts to get static_cast behaviour for integral types (T), and
+  // reinterpret_cast behaviour for other types.
+
+  uintptr_t pointer_raw = (uintptr_t)pointer;
+  VIXL_STATIC_ASSERT(sizeof(pointer) == sizeof(pointer_raw));
+
   size_t align_step = (alignment - pointer_raw) % alignment;
   VIXL_ASSERT((pointer_raw + align_step) % alignment == 0);
-  return reinterpret_cast<T>(pointer_raw + align_step);
+
+  return (T)(pointer_raw + align_step);
 }
 
 // Decrement a pointer until it has the specified alignment.
 template<class T>
 T AlignDown(T pointer, size_t alignment) {
-  VIXL_STATIC_ASSERT(sizeof(pointer) == sizeof(uintptr_t));
-  uintptr_t pointer_raw = reinterpret_cast<uintptr_t>(pointer);
+  // Use C-style casts to get static_cast behaviour for integral types (T), and
+  // reinterpret_cast behaviour for other types.
+
+  uintptr_t pointer_raw = (uintptr_t)pointer;
+  VIXL_STATIC_ASSERT(sizeof(pointer) == sizeof(pointer_raw));
+
   size_t align_step = pointer_raw % alignment;
   VIXL_ASSERT((pointer_raw - align_step) % alignment == 0);
-  return reinterpret_cast<T>(pointer_raw - align_step);
+
+  return (T)(pointer_raw - align_step);
 }
 
 
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index bd206a019a..d6fffc75bd 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -541,7 +541,7 @@ static QEMUMachine machvirt_a15_machine = {
     .name = "virt",
     .desc = "ARM Virtual Machine",
     .init = machvirt_init,
-    .max_cpus = 4,
+    .max_cpus = 8,
 };
 
 static void machvirt_machine_init(void)
diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c
index 1532ef9482..db9110c1c4 100644
--- a/hw/intc/arm_gic.c
+++ b/hw/intc/arm_gic.c
@@ -66,7 +66,8 @@ void gic_update(GICState *s)
         best_prio = 0x100;
         best_irq = 1023;
         for (irq = 0; irq < s->num_irq; irq++) {
-            if (GIC_TEST_ENABLED(irq, cm) && gic_test_pending(s, irq, cm)) {
+            if (GIC_TEST_ENABLED(irq, cm) && gic_test_pending(s, irq, cm) &&
+                (irq < GIC_INTERNAL || GIC_TARGET(irq) & cm)) {
                 if (GIC_GET_PRIORITY(irq, cpu) < best_prio) {
                     best_prio = GIC_GET_PRIORITY(irq, cpu);
                     best_irq = irq;
@@ -372,7 +373,7 @@ static uint32_t gic_dist_readb(void *opaque, hwaddr offset)
         }
     } else if (offset < 0xf00) {
         /* Interrupt Configuration.  */
-        irq = (offset - 0xc00) * 2 + GIC_BASE_IRQ;
+        irq = (offset - 0xc00) * 4 + GIC_BASE_IRQ;
         if (irq >= s->num_irq)
             goto bad_reg;
         res = 0;
@@ -558,13 +559,15 @@ static void gic_dist_writeb(void *opaque, hwaddr offset,
         irq = (offset - 0xc00) * 4 + GIC_BASE_IRQ;
         if (irq >= s->num_irq)
             goto bad_reg;
-        if (irq < GIC_INTERNAL)
+        if (irq < GIC_NR_SGIS)
             value |= 0xaa;
         for (i = 0; i < 4; i++) {
-            if (value & (1 << (i * 2))) {
-                GIC_SET_MODEL(irq + i);
-            } else {
-                GIC_CLEAR_MODEL(irq + i);
+            if (s->revision == REV_11MPCORE || s->revision == REV_NVIC) {
+                if (value & (1 << (i * 2))) {
+                    GIC_SET_MODEL(irq + i);
+                } else {
+                    GIC_CLEAR_MODEL(irq + i);
+                }
             }
             if (value & (2 << (i * 2))) {
                 GIC_SET_EDGE_TRIGGER(irq + i);
diff --git a/hw/intc/arm_gic_common.c b/hw/intc/arm_gic_common.c
index 6d884eca3b..18b01ba0c7 100644
--- a/hw/intc/arm_gic_common.c
+++ b/hw/intc/arm_gic_common.c
@@ -128,7 +128,7 @@ static void arm_gic_common_reset(DeviceState *dev)
         s->running_priority[i] = 0x100;
         s->cpu_enabled[i] = false;
     }
-    for (i = 0; i < 16; i++) {
+    for (i = 0; i < GIC_NR_SGIS; i++) {
         GIC_SET_ENABLED(i, ALL_CPU_MASK);
         GIC_SET_EDGE_TRIGGER(i);
     }
diff --git a/hw/usb/bus.c b/hw/usb/bus.c
index 927a47bbff..c7c4dadedd 100644
--- a/hw/usb/bus.c
+++ b/hw/usb/bus.c
@@ -87,6 +87,13 @@ void usb_bus_new(USBBus *bus, size_t bus_size,
     QTAILQ_INSERT_TAIL(&busses, bus, next);
 }
 
+void usb_bus_release(USBBus *bus)
+{
+    assert(next_usb_bus > 0);
+
+    QTAILQ_REMOVE(&busses, bus, next);
+}
+
 USBBus *usb_bus_find(int busnr)
 {
     USBBus *bus;
@@ -589,11 +596,11 @@ static char *usb_get_fw_dev_path(DeviceState *qdev)
         nr = strtol(in, &in, 10);
         if (in[0] == '.') {
             /* some hub between root port and device */
-            pos += snprintf(fw_path + pos, fw_len - pos, "hub@%ld/", nr);
+            pos += snprintf(fw_path + pos, fw_len - pos, "hub@%lx/", nr);
             in++;
         } else {
             /* the device itself */
-            pos += snprintf(fw_path + pos, fw_len - pos, "%s@%ld",
+            pos += snprintf(fw_path + pos, fw_len - pos, "%s@%lx",
                             qdev_fw_name(qdev), nr);
             break;
         }
diff --git a/hw/usb/hcd-ehci-pci.c b/hw/usb/hcd-ehci-pci.c
index 505741a783..289ca3b853 100644
--- a/hw/usb/hcd-ehci-pci.c
+++ b/hw/usb/hcd-ehci-pci.c
@@ -84,6 +84,19 @@ static void usb_ehci_pci_init(Object *obj)
     usb_ehci_init(s, DEVICE(obj));
 }
 
+static void usb_ehci_pci_exit(PCIDevice *dev)
+{
+    EHCIPCIState *i = PCI_EHCI(dev);
+    EHCIState *s = &i->ehci;
+
+    usb_ehci_unrealize(s, DEVICE(dev), NULL);
+
+    if (s->irq) {
+        g_free(s->irq);
+        s->irq = NULL;
+    }
+}
+
 static void usb_ehci_pci_write_config(PCIDevice *dev, uint32_t addr,
                                       uint32_t val, int l)
 {
@@ -121,6 +134,7 @@ static void ehci_class_init(ObjectClass *klass, void *data)
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
 
     k->init = usb_ehci_pci_initfn;
+    k->exit = usb_ehci_pci_exit;
     k->class_id = PCI_CLASS_SERIAL_USB;
     k->config_write = usb_ehci_pci_write_config;
     dc->hotpluggable = false;
diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index 448e0073dd..bacb7ceac9 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -2468,7 +2468,34 @@ void usb_ehci_realize(EHCIState *s, DeviceState *dev, Error **errp)
     s->device = dev;
 
     qemu_register_reset(ehci_reset, s);
-    qemu_add_vm_change_state_handler(usb_ehci_vm_state_change, s);
+    s->vmstate = qemu_add_vm_change_state_handler(usb_ehci_vm_state_change, s);
+}
+
+void usb_ehci_unrealize(EHCIState *s, DeviceState *dev, Error **errp)
+{
+    trace_usb_ehci_unrealize();
+
+    if (s->frame_timer) {
+        timer_del(s->frame_timer);
+        timer_free(s->frame_timer);
+        s->frame_timer = NULL;
+    }
+    if (s->async_bh) {
+        qemu_bh_delete(s->async_bh);
+    }
+
+    ehci_queues_rip_all(s, 0);
+    ehci_queues_rip_all(s, 1);
+
+    memory_region_del_subregion(&s->mem, &s->mem_caps);
+    memory_region_del_subregion(&s->mem, &s->mem_opreg);
+    memory_region_del_subregion(&s->mem, &s->mem_ports);
+
+    usb_bus_release(&s->bus);
+
+    if (s->vmstate) {
+        qemu_del_vm_change_state_handler(s->vmstate);
+    }
 }
 
 void usb_ehci_init(EHCIState *s, DeviceState *dev)
diff --git a/hw/usb/hcd-ehci.h b/hw/usb/hcd-ehci.h
index 1ad4b96cce..4858b7e80c 100644
--- a/hw/usb/hcd-ehci.h
+++ b/hw/usb/hcd-ehci.h
@@ -316,12 +316,14 @@ struct EHCIState {
     uint32_t async_stepdown;
     uint32_t periodic_sched_active;
     bool int_req_by_async;
+    VMChangeStateEntry *vmstate;
 };
 
 extern const VMStateDescription vmstate_ehci;
 
 void usb_ehci_init(EHCIState *s, DeviceState *dev);
 void usb_ehci_realize(EHCIState *s, DeviceState *dev, Error **errp);
+void usb_ehci_unrealize(EHCIState *s, DeviceState *dev, Error **errp);
 
 #define TYPE_PCI_EHCI "pci-ehci-usb"
 #define PCI_EHCI(obj) OBJECT_CHECK(EHCIPCIState, (obj), TYPE_PCI_EHCI)
diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
index 13afdf5919..83bec34185 100644
--- a/hw/usb/hcd-ohci.c
+++ b/hw/usb/hcd-ohci.c
@@ -619,8 +619,8 @@ static inline int ohci_put_td(OHCIState *ohci,
 static inline int ohci_put_iso_td(OHCIState *ohci,
                                   dma_addr_t addr, struct ohci_iso_td *td)
 {
-    return put_dwords(ohci, addr, (uint32_t *)td, 4 ||
-           put_words(ohci, addr + 16, td->offset, 8));
+    return put_dwords(ohci, addr, (uint32_t *)td, 4) ||
+           put_words(ohci, addr + 16, td->offset, 8);
 }
 
 static inline int ohci_put_hcca(OHCIState *ohci,
@@ -1371,8 +1371,10 @@ static int ohci_bus_start(OHCIState *ohci)
 /* Stop sending SOF tokens on the bus */
 static void ohci_bus_stop(OHCIState *ohci)
 {
-    if (ohci->eof_timer)
+    if (ohci->eof_timer) {
         timer_del(ohci->eof_timer);
+        timer_free(ohci->eof_timer);
+    }
     ohci->eof_timer = NULL;
 }
 
@@ -1952,6 +1954,24 @@ static int usb_ohci_initfn_pci(PCIDevice *dev)
     return 0;
 }
 
+static void usb_ohci_exit(PCIDevice *dev)
+{
+    OHCIPCIState *ohci = PCI_OHCI(dev);
+    OHCIState *s = &ohci->state;
+
+    ohci_bus_stop(s);
+
+    if (s->async_td) {
+        usb_cancel_packet(&s->usb_packet);
+        s->async_td = 0;
+    }
+    ohci_stop_endpoints(s);
+
+    if (!ohci->masterbus) {
+        usb_bus_release(&s->bus);
+    }
+}
+
 #define TYPE_SYSBUS_OHCI "sysbus-ohci"
 #define SYSBUS_OHCI(obj) OBJECT_CHECK(OHCISysBusState, (obj), TYPE_SYSBUS_OHCI)
 
@@ -2089,6 +2109,7 @@ static void ohci_pci_class_init(ObjectClass *klass, void *data)
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
 
     k->init = usb_ohci_initfn_pci;
+    k->exit = usb_ohci_exit;
     k->vendor_id = PCI_VENDOR_ID_APPLE;
     k->device_id = PCI_DEVICE_ID_APPLE_IPID_USB;
     k->class_id = PCI_CLASS_SERIAL_USB;
diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index ee5f112d65..3b3ebcda8b 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -1256,6 +1256,29 @@ static int usb_uhci_vt82c686b_initfn(PCIDevice *dev)
     return usb_uhci_common_initfn(dev);
 }
 
+static void usb_uhci_exit(PCIDevice *dev)
+{
+    UHCIState *s = DO_UPCAST(UHCIState, dev, dev);
+
+    trace_usb_uhci_exit();
+
+    if (s->frame_timer) {
+        timer_del(s->frame_timer);
+        timer_free(s->frame_timer);
+        s->frame_timer = NULL;
+    }
+
+    if (s->bh) {
+        qemu_bh_delete(s->bh);
+    }
+
+    uhci_async_cancel_all(s);
+
+    if (!s->masterbus) {
+        usb_bus_release(&s->bus);
+    }
+}
+
 static Property uhci_properties[] = {
     DEFINE_PROP_STRING("masterbus", UHCIState, masterbus),
     DEFINE_PROP_UINT32("firstport", UHCIState, firstport, 0),
@@ -1272,6 +1295,7 @@ static void uhci_class_init(ObjectClass *klass, void *data)
     UHCIInfo *info = data;
 
     k->init = info->initfn ? info->initfn : usb_uhci_common_initfn;
+    k->exit = info->unplug ? usb_uhci_exit : NULL;
     k->vendor_id = info->vendor_id;
     k->device_id = info->device_id;
     k->revision  = info->revision;
diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index 58c4b11527..bbe4c5fb85 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -1151,7 +1151,7 @@ static void xhci_reset_streams(XHCIEPContext *epctx)
 static void xhci_alloc_streams(XHCIEPContext *epctx, dma_addr_t base)
 {
     assert(epctx->pstreams == NULL);
-    epctx->nr_pstreams = 2 << (epctx->max_pstreams + 1);
+    epctx->nr_pstreams = 2 << epctx->max_pstreams;
     epctx->pstreams = xhci_alloc_stream_contexts(epctx->nr_pstreams, base);
 }
 
@@ -1180,7 +1180,7 @@ static int xhci_epmask_to_eps_with_streams(XHCIState *xhci,
     slot = &xhci->slots[slotid - 1];
 
     for (i = 2, j = 0; i <= 31; i++) {
-        if (!(epmask & (1 << i))) {
+        if (!(epmask & (1u << i))) {
             continue;
         }
 
@@ -1380,14 +1380,11 @@ static void xhci_init_epctx(XHCIEPContext *epctx,
     dequeue = xhci_addr64(ctx[2] & ~0xf, ctx[3]);
 
     epctx->type = (ctx[1] >> EP_TYPE_SHIFT) & EP_TYPE_MASK;
-    DPRINTF("xhci: endpoint %d.%d type is %d\n", epid/2, epid%2, epctx->type);
     epctx->pctx = pctx;
     epctx->max_psize = ctx[1]>>16;
     epctx->max_psize *= 1+((ctx[1]>>8)&0xff);
     epctx->max_pstreams = (ctx[0] >> 10) & 0xf;
     epctx->lsa = (ctx[0] >> 15) & 1;
-    DPRINTF("xhci: endpoint %d.%d max transaction (burst) size is %d\n",
-            epid/2, epid%2, epctx->max_psize);
     if (epctx->max_pstreams) {
         xhci_alloc_streams(epctx, dequeue);
     } else {
@@ -1418,6 +1415,9 @@ static TRBCCode xhci_enable_ep(XHCIState *xhci, unsigned int slotid,
     slot->eps[epid-1] = epctx;
     xhci_init_epctx(epctx, pctx, ctx);
 
+    DPRINTF("xhci: endpoint %d.%d type is %d, max transaction (burst) "
+            "size is %d\n", epid/2, epid%2, epctx->type, epctx->max_psize);
+
     epctx->mfindex_last = 0;
 
     epctx->state = EP_RUNNING;
@@ -2465,7 +2465,7 @@ static TRBCCode xhci_configure_slot(XHCIState *xhci, unsigned int slotid,
     res = xhci_alloc_device_streams(xhci, slotid, ictl_ctx[1]);
     if (res != CC_SUCCESS) {
         for (i = 2; i <= 31; i++) {
-            if (ictl_ctx[1] & (1 << i)) {
+            if (ictl_ctx[1] & (1u << i)) {
                 xhci_disable_ep(xhci, slotid, i);
             }
         }
@@ -3644,6 +3644,43 @@ static int usb_xhci_initfn(struct PCIDevice *dev)
     return 0;
 }
 
+static void usb_xhci_exit(PCIDevice *dev)
+{
+    int i;
+    XHCIState *xhci = XHCI(dev);
+
+    trace_usb_xhci_exit();
+
+    for (i = 0; i < xhci->numslots; i++) {
+        xhci_disable_slot(xhci, i + 1);
+    }
+
+    if (xhci->mfwrap_timer) {
+        timer_del(xhci->mfwrap_timer);
+        timer_free(xhci->mfwrap_timer);
+        xhci->mfwrap_timer = NULL;
+    }
+
+    memory_region_del_subregion(&xhci->mem, &xhci->mem_cap);
+    memory_region_del_subregion(&xhci->mem, &xhci->mem_oper);
+    memory_region_del_subregion(&xhci->mem, &xhci->mem_runtime);
+    memory_region_del_subregion(&xhci->mem, &xhci->mem_doorbell);
+
+    for (i = 0; i < xhci->numports; i++) {
+        XHCIPort *port = &xhci->ports[i];
+        memory_region_del_subregion(&xhci->mem, &port->mem);
+    }
+
+    /* destroy msix memory region */
+    if (dev->msix_table && dev->msix_pba
+        && dev->msix_entry_used) {
+        memory_region_del_subregion(&xhci->mem, &dev->msix_table_mmio);
+        memory_region_del_subregion(&xhci->mem, &dev->msix_pba_mmio);
+    }
+
+    usb_bus_release(&xhci->bus);
+}
+
 static int usb_xhci_post_load(void *opaque, int version_id)
 {
     XHCIState *xhci = opaque;
@@ -3836,6 +3873,7 @@ static void xhci_class_init(ObjectClass *klass, void *data)
     dc->hotpluggable   = false;
     set_bit(DEVICE_CATEGORY_USB, dc->categories);
     k->init         = usb_xhci_initfn;
+    k->exit         = usb_xhci_exit;
     k->vendor_id    = PCI_VENDOR_ID_NEC;
     k->device_id    = PCI_DEVICE_ID_NEC_UPD720200;
     k->class_id     = PCI_CLASS_SERIAL_USB;
diff --git a/include/hw/usb.h b/include/hw/usb.h
index 223a5aef8f..6b32a3bb70 100644
--- a/include/hw/usb.h
+++ b/include/hw/usb.h
@@ -529,6 +529,7 @@ struct USBBusOps {
 
 void usb_bus_new(USBBus *bus, size_t bus_size,
                  USBBusOps *ops, DeviceState *host);
+void usb_bus_release(USBBus *bus);
 USBBus *usb_bus_find(int busnr);
 void usb_legacy_register(const char *typename, const char *usbdevice_name,
                          USBDevice *(*usbdevice_init)(USBBus *bus,
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 8098b8d357..51bedc8262 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -191,8 +191,8 @@ typedef struct CPUARMState {
         uint64_t par_el1;  /* Translation result. */
         uint32_t c9_insn; /* Cache lockdown registers.  */
         uint32_t c9_data;
-        uint32_t c9_pmcr; /* performance monitor control register */
-        uint32_t c9_pmcnten; /* perf monitor counter enables */
+        uint64_t c9_pmcr; /* performance monitor control register */
+        uint64_t c9_pmcnten; /* perf monitor counter enables */
         uint32_t c9_pmovsr; /* perf monitor overflow status */
         uint32_t c9_pmxevtyper; /* perf monitor event type */
         uint32_t c9_pmuserenr; /* perf monitor user enable */
@@ -224,7 +224,8 @@ typedef struct CPUARMState {
         /* If the counter is enabled, this stores the last time the counter
          * was reset. Otherwise it stores the counter value
          */
-        uint32_t c15_ccnt;
+        uint64_t c15_ccnt;
+        uint64_t pmccfiltr_el0; /* Performance Monitor Filter Register */
     } cp15;
 
     struct {
@@ -352,6 +353,17 @@ int cpu_arm_signal_handler(int host_signum, void *pinfo,
 int arm_cpu_handle_mmu_fault(CPUState *cpu, vaddr address, int rw,
                              int mmu_idx);
 
+/**
+ * pmccntr_sync
+ * @env: CPUARMState
+ *
+ * Synchronises the counter in the PMCCNTR. This must always be called twice,
+ * once before any action that might affect the timer and again afterwards.
+ * The function is used to swap the state of the register if required.
+ * This only happens when not in user mode (!CONFIG_USER_ONLY)
+ */
+void pmccntr_sync(CPUARMState *env);
+
 /* SCTLR bit meanings. Several bits have been reused in newer
  * versions of the architecture; in that case we define constants
  * for both old and new bit meanings. Code which tests against those
@@ -1255,7 +1267,14 @@ static inline bool arm_singlestep_active(CPUARMState *env)
 static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
                                         target_ulong *cs_base, int *flags)
 {
-    int fpen = extract32(env->cp15.c1_coproc, 20, 2);
+    int fpen;
+
+    if (arm_feature(env, ARM_FEATURE_V6)) {
+        fpen = extract32(env->cp15.c1_coproc, 20, 2);
+    } else {
+        /* CPACR doesn't exist before v6, so VFP is always accessible */
+        fpen = 3;
+    }
 
     if (is_a64(env)) {
         *pc = env->pc;
diff --git a/target-arm/cpu64.c b/target-arm/cpu64.c
index 38d2b8445a..aa42803959 100644
--- a/target-arm/cpu64.c
+++ b/target-arm/cpu64.c
@@ -123,9 +123,10 @@ static void aarch64_a57_initfn(Object *obj)
     cpu->id_isar2 = 0x21232042;
     cpu->id_isar3 = 0x01112131;
     cpu->id_isar4 = 0x00011142;
+    cpu->id_isar5 = 0x00011121;
     cpu->id_aa64pfr0 = 0x00002222;
     cpu->id_aa64dfr0 = 0x10305106;
-    cpu->id_aa64isar0 = 0x00010000;
+    cpu->id_aa64isar0 = 0x00011120;
     cpu->id_aa64mmfr0 = 0x00001124;
     cpu->dbgdidr = 0x3516d000;
     cpu->clidr = 0x0a200023;
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 2a77c97c7b..2b95f33872 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -548,25 +548,40 @@ static CPAccessResult pmreg_access(CPUARMState *env, const ARMCPRegInfo *ri)
 }
 
 #ifndef CONFIG_USER_ONLY
-static void pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                       uint64_t value)
+
+static inline bool arm_ccnt_enabled(CPUARMState *env)
 {
-    /* Don't computer the number of ticks in user mode */
-    uint32_t temp_ticks;
+    /* This does not support checking PMCCFILTR_EL0 register */
 
-    temp_ticks = qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) *
-                  get_ticks_per_sec() / 1000000;
+    if (!(env->cp15.c9_pmcr & PMCRE)) {
+        return false;
+    }
 
-    if (env->cp15.c9_pmcr & PMCRE) {
-        /* If the counter is enabled */
-        if (env->cp15.c9_pmcr & PMCRD) {
-            /* Increment once every 64 processor clock cycles */
-            env->cp15.c15_ccnt = (temp_ticks/64) - env->cp15.c15_ccnt;
-        } else {
-            env->cp15.c15_ccnt = temp_ticks - env->cp15.c15_ccnt;
-        }
+    return true;
+}
+
+void pmccntr_sync(CPUARMState *env)
+{
+    uint64_t temp_ticks;
+
+    temp_ticks = muldiv64(qemu_clock_get_us(QEMU_CLOCK_VIRTUAL),
+                          get_ticks_per_sec(), 1000000);
+
+    if (env->cp15.c9_pmcr & PMCRD) {
+        /* Increment once every 64 processor clock cycles */
+        temp_ticks /= 64;
     }
 
+    if (arm_ccnt_enabled(env)) {
+        env->cp15.c15_ccnt = temp_ticks - env->cp15.c15_ccnt;
+    }
+}
+
+static void pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                       uint64_t value)
+{
+    pmccntr_sync(env);
+
     if (value & PMCRC) {
         /* The counter has been reset */
         env->cp15.c15_ccnt = 0;
@@ -576,26 +591,20 @@ static void pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
     env->cp15.c9_pmcr &= ~0x39;
     env->cp15.c9_pmcr |= (value & 0x39);
 
-    if (env->cp15.c9_pmcr & PMCRE) {
-        if (env->cp15.c9_pmcr & PMCRD) {
-            /* Increment once every 64 processor clock cycles */
-            temp_ticks /= 64;
-        }
-        env->cp15.c15_ccnt = temp_ticks - env->cp15.c15_ccnt;
-    }
+    pmccntr_sync(env);
 }
 
 static uint64_t pmccntr_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
-    uint32_t total_ticks;
+    uint64_t total_ticks;
 
-    if (!(env->cp15.c9_pmcr & PMCRE)) {
+    if (!arm_ccnt_enabled(env)) {
         /* Counter is disabled, do not change value */
         return env->cp15.c15_ccnt;
     }
 
-    total_ticks = qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) *
-                  get_ticks_per_sec() / 1000000;
+    total_ticks = muldiv64(qemu_clock_get_us(QEMU_CLOCK_VIRTUAL),
+                           get_ticks_per_sec(), 1000000);
 
     if (env->cp15.c9_pmcr & PMCRD) {
         /* Increment once every 64 processor clock cycles */
@@ -607,16 +616,16 @@ static uint64_t pmccntr_read(CPUARMState *env, const ARMCPRegInfo *ri)
 static void pmccntr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                         uint64_t value)
 {
-    uint32_t total_ticks;
+    uint64_t total_ticks;
 
-    if (!(env->cp15.c9_pmcr & PMCRE)) {
+    if (!arm_ccnt_enabled(env)) {
         /* Counter is disabled, set the absolute value */
         env->cp15.c15_ccnt = value;
         return;
     }
 
-    total_ticks = qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) *
-                  get_ticks_per_sec() / 1000000;
+    total_ticks = muldiv64(qemu_clock_get_us(QEMU_CLOCK_VIRTUAL),
+                           get_ticks_per_sec(), 1000000);
 
     if (env->cp15.c9_pmcr & PMCRD) {
         /* Increment once every 64 processor clock cycles */
@@ -624,8 +633,31 @@ static void pmccntr_write(CPUARMState *env, const ARMCPRegInfo *ri,
     }
     env->cp15.c15_ccnt = total_ticks - value;
 }
+
+static void pmccntr_write32(CPUARMState *env, const ARMCPRegInfo *ri,
+                            uint64_t value)
+{
+    uint64_t cur_val = pmccntr_read(env, NULL);
+
+    pmccntr_write(env, ri, deposit64(cur_val, 0, 32, value));
+}
+
+#else /* CONFIG_USER_ONLY */
+
+void pmccntr_sync(CPUARMState *env)
+{
+}
+
 #endif
 
+static void pmccfiltr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                            uint64_t value)
+{
+    pmccntr_sync(env);
+    env->cp15.pmccfiltr_el0 = value & 0x7E000000;
+    pmccntr_sync(env);
+}
+
 static void pmcntenset_write(CPUARMState *env, const ARMCPRegInfo *ri,
                             uint64_t value)
 {
@@ -728,16 +760,28 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
      * or PL0_RO as appropriate and then check PMUSERENR in the helper fn.
      */
     { .name = "PMCNTENSET", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 1,
-      .access = PL0_RW, .resetvalue = 0,
-      .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten),
+      .access = PL0_RW, .type = ARM_CP_NO_MIGRATE,
+      .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcnten),
       .writefn = pmcntenset_write,
       .accessfn = pmreg_access,
       .raw_writefn = raw_write },
+    { .name = "PMCNTENSET_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 1,
+      .access = PL0_RW, .accessfn = pmreg_access,
+      .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten), .resetvalue = 0,
+      .writefn = pmcntenset_write, .raw_writefn = raw_write },
     { .name = "PMCNTENCLR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 2,
-      .access = PL0_RW, .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten),
+      .access = PL0_RW,
+      .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcnten),
       .accessfn = pmreg_access,
       .writefn = pmcntenclr_write,
       .type = ARM_CP_NO_MIGRATE },
+    { .name = "PMCNTENCLR_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 2,
+      .access = PL0_RW, .accessfn = pmreg_access,
+      .type = ARM_CP_NO_MIGRATE,
+      .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten),
+      .writefn = pmcntenclr_write },
     { .name = "PMOVSR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 3,
       .access = PL0_RW, .fieldoffset = offsetof(CPUARMState, cp15.c9_pmovsr),
       .accessfn = pmreg_access,
@@ -755,9 +799,21 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
 #ifndef CONFIG_USER_ONLY
     { .name = "PMCCNTR", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 0,
       .access = PL0_RW, .resetvalue = 0, .type = ARM_CP_IO,
-      .readfn = pmccntr_read, .writefn = pmccntr_write,
+      .readfn = pmccntr_read, .writefn = pmccntr_write32,
       .accessfn = pmreg_access },
+    { .name = "PMCCNTR_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 13, .opc2 = 0,
+      .access = PL0_RW, .accessfn = pmreg_access,
+      .type = ARM_CP_IO,
+      .readfn = pmccntr_read, .writefn = pmccntr_write, },
 #endif
+    { .name = "PMCCFILTR_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 15, .opc2 = 7,
+      .writefn = pmccfiltr_write,
+      .access = PL0_RW, .accessfn = pmreg_access,
+      .type = ARM_CP_IO,
+      .fieldoffset = offsetof(CPUARMState, cp15.pmccfiltr_el0),
+      .resetvalue = 0, },
     { .name = "PMXEVTYPER", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 1,
       .access = PL0_RW,
       .fieldoffset = offsetof(CPUARMState, cp15.c9_pmxevtyper),
@@ -2386,13 +2442,23 @@ void register_cp_regs_for_features(ARMCPU *cpu)
 #ifndef CONFIG_USER_ONLY
         ARMCPRegInfo pmcr = {
             .name = "PMCR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 0,
-            .access = PL0_RW, .resetvalue = cpu->midr & 0xff000000,
-            .type = ARM_CP_IO,
-            .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcr),
+            .access = PL0_RW,
+            .type = ARM_CP_IO | ARM_CP_NO_MIGRATE,
+            .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcr),
             .accessfn = pmreg_access, .writefn = pmcr_write,
             .raw_writefn = raw_write,
         };
+        ARMCPRegInfo pmcr64 = {
+            .name = "PMCR_EL0", .state = ARM_CP_STATE_AA64,
+            .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 0,
+            .access = PL0_RW, .accessfn = pmreg_access,
+            .type = ARM_CP_IO,
+            .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcr),
+            .resetvalue = cpu->midr & 0xff000000,
+            .writefn = pmcr_write, .raw_writefn = raw_write,
+        };
         define_one_arm_cp_reg(cpu, &pmcr);
+        define_one_arm_cp_reg(cpu, &pmcr64);
 #endif
         ARMCPRegInfo clidr = {
             .name = "CLIDR", .state = ARM_CP_STATE_BOTH,
diff --git a/tests/Makefile b/tests/Makefile
index 837e9c89e7..469c0a5e44 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -155,11 +155,16 @@ check-qtest-i386-y += tests/i82801b11-test$(EXESUF)
 gcov-files-i386-y += hw/pci-bridge/i82801b11.c
 check-qtest-i386-y += tests/ioh3420-test$(EXESUF)
 gcov-files-i386-y += hw/pci-bridge/ioh3420.c
+check-qtest-i386-y += tests/usb-hcd-ohci-test$(EXESUF)
+gcov-files-i386-y += hw/usb/hcd-ohci.c
+check-qtest-i386-y += tests/usb-hcd-uhci-test$(EXESUF)
+gcov-files-i386-y += hw/usb/hcd-uhci.c
 check-qtest-i386-y += tests/usb-hcd-ehci-test$(EXESUF)
 gcov-files-i386-y += hw/usb/hcd-ehci.c
-gcov-files-i386-y += hw/usb/hcd-uhci.c
 gcov-files-i386-y += hw/usb/dev-hid.c
 gcov-files-i386-y += hw/usb/dev-storage.c
+check-qtest-i386-y += tests/usb-hcd-xhci-test$(EXESUF)
+gcov-files-i386-y += hw/usb/hcd-xhci.c
 check-qtest-i386-$(CONFIG_LINUX) += tests/vhost-user-test$(EXESUF)
 check-qtest-x86_64-y = $(check-qtest-i386-y)
 gcov-files-i386-y += i386-softmmu/hw/timer/mc146818rtc.c
@@ -335,7 +340,10 @@ tests/ac97-test$(EXESUF): tests/ac97-test.o
 tests/es1370-test$(EXESUF): tests/es1370-test.o
 tests/intel-hda-test$(EXESUF): tests/intel-hda-test.o
 tests/ioh3420-test$(EXESUF): tests/ioh3420-test.o
+tests/usb-hcd-ohci-test$(EXESUF): tests/usb-hcd-ohci-test.o
+tests/usb-hcd-uhci-test$(EXESUF): tests/usb-hcd-uhci-test.o
 tests/usb-hcd-ehci-test$(EXESUF): tests/usb-hcd-ehci-test.o $(libqos-pc-obj-y)
+tests/usb-hcd-xhci-test$(EXESUF): tests/usb-hcd-xhci-test.o
 tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o qemu-char.o qemu-timer.o $(qtest-obj-y)
 tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o
 tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o libqemuutil.a libqemustub.a
diff --git a/tests/usb-hcd-ohci-test.c b/tests/usb-hcd-ohci-test.c
new file mode 100644
index 0000000000..fbc3ffeebd
--- /dev/null
+++ b/tests/usb-hcd-ohci-test.c
@@ -0,0 +1,35 @@
+/*
+ * QTest testcase for USB OHCI controller
+ *
+ * Copyright (c) 2014 HUAWEI TECHNOLOGIES CO.,LTD.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <glib.h>
+#include <string.h>
+#include "libqtest.h"
+#include "qemu/osdep.h"
+
+
+static void test_ohci_init(void)
+{
+    qtest_start("-device pci-ohci,id=ohci");
+
+    qtest_end();
+}
+
+
+int main(int argc, char **argv)
+{
+    int ret;
+
+    g_test_init(&argc, &argv, NULL);
+
+    qtest_add_func("/ohci/pci/init", test_ohci_init);
+
+    ret = g_test_run();
+
+    return ret;
+}
diff --git a/tests/usb-hcd-uhci-test.c b/tests/usb-hcd-uhci-test.c
new file mode 100644
index 0000000000..94e858f4c6
--- /dev/null
+++ b/tests/usb-hcd-uhci-test.c
@@ -0,0 +1,35 @@
+/*
+ * QTest testcase for USB UHCI controller
+ *
+ * Copyright (c) 2014 HUAWEI TECHNOLOGIES CO.,LTD.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <glib.h>
+#include <string.h>
+#include "libqtest.h"
+#include "qemu/osdep.h"
+
+
+static void test_uhci_init(void)
+{
+    qtest_start("-device piix3-usb-uhci,id=uhci");
+
+    qtest_end();
+}
+
+
+int main(int argc, char **argv)
+{
+    int ret;
+
+    g_test_init(&argc, &argv, NULL);
+
+    qtest_add_func("/uhci/pci/init", test_uhci_init);
+
+    ret = g_test_run();
+
+    return ret;
+}
diff --git a/tests/usb-hcd-xhci-test.c b/tests/usb-hcd-xhci-test.c
new file mode 100644
index 0000000000..743e9798cd
--- /dev/null
+++ b/tests/usb-hcd-xhci-test.c
@@ -0,0 +1,35 @@
+/*
+ * QTest testcase for USB xHCI controller
+ *
+ * Copyright (c) 2014 HUAWEI TECHNOLOGIES CO.,LTD.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <glib.h>
+#include <string.h>
+#include "libqtest.h"
+#include "qemu/osdep.h"
+
+
+static void test_xhci_init(void)
+{
+    qtest_start("-device nec-usb-xhci,id=xhci");
+
+    qtest_end();
+}
+
+
+int main(int argc, char **argv)
+{
+    int ret;
+
+    g_test_init(&argc, &argv, NULL);
+
+    qtest_add_func("/xhci/pci/init", test_xhci_init);
+
+    ret = g_test_run();
+
+    return ret;
+}
diff --git a/trace-events b/trace-events
index 81bc915edd..03ac5d205c 100644
--- a/trace-events
+++ b/trace-events
@@ -297,6 +297,7 @@ usb_port_release(int bus, const char *port) "bus %d, port %s"
 
 # hw/usb/hcd-ehci.c
 usb_ehci_reset(void) "=== RESET ==="
+usb_ehci_unrealize(void) "=== UNREALIZE ==="
 usb_ehci_opreg_read(uint32_t addr, const char *str, uint32_t val) "rd mmio %04x [%s] = %x"
 usb_ehci_opreg_write(uint32_t addr, const char *str, uint32_t val) "wr mmio %04x [%s] = %x"
 usb_ehci_opreg_change(uint32_t addr, const char *str, uint32_t new, uint32_t old) "ch mmio %04x [%s] = %x (old: %x)"
@@ -329,6 +330,7 @@ usb_ehci_dma_error(void) ""
 
 # hw/usb/hcd-uhci.c
 usb_uhci_reset(void) "=== RESET ==="
+usb_uhci_exit(void) "=== EXIT ==="
 usb_uhci_schedule_start(void) ""
 usb_uhci_schedule_stop(void) ""
 usb_uhci_frame_start(uint32_t num) "nr %d"
@@ -358,6 +360,7 @@ usb_uhci_td_complete(uint32_t qh, uint32_t td) "qh 0x%x, td 0x%x"
 
 # hw/usb/hcd-xhci.c
 usb_xhci_reset(void) "=== RESET ==="
+usb_xhci_exit(void) "=== EXIT ==="
 usb_xhci_run(void) ""
 usb_xhci_stop(void) ""
 usb_xhci_cap_read(uint32_t off, uint32_t val) "off 0x%04x, ret 0x%08x"