summary refs log tree commit diff stats
path: root/target-ppc
diff options
context:
space:
mode:
Diffstat (limited to 'target-ppc')
-rw-r--r--target-ppc/STATUS9
-rw-r--r--target-ppc/cpu-models.c2
-rw-r--r--target-ppc/cpu-models.h1
-rw-r--r--target-ppc/cpu-qom.h2
-rw-r--r--target-ppc/cpu.h55
-rw-r--r--target-ppc/fpu_helper.c1294
-rw-r--r--target-ppc/helper.h207
-rw-r--r--target-ppc/int_helper.c1411
-rw-r--r--target-ppc/kvm.c117
-rw-r--r--target-ppc/kvm_ppc.h35
-rw-r--r--target-ppc/machine.c11
-rw-r--r--target-ppc/misc_helper.c4
-rw-r--r--target-ppc/mmu-hash64.c117
-rw-r--r--target-ppc/mmu-hash64.h47
-rw-r--r--target-ppc/mmu_helper.c3
-rw-r--r--target-ppc/translate.c1268
-rw-r--r--target-ppc/translate_init.c274
17 files changed, 4225 insertions, 632 deletions
diff --git a/target-ppc/STATUS b/target-ppc/STATUS
index c8e9018bfc..a4d48a7ca2 100644
--- a/target-ppc/STATUS
+++ b/target-ppc/STATUS
@@ -377,15 +377,6 @@ MMU   OK
 EXCP  KO partially implemented
 Remarks: Should be able to boot but there is no hw platform currently emulated.
 
-PowerPC 970GX:
-INSN  KO Altivec missing and more
-SPR   KO
-MSR   ?
-IRQ   OK
-MMU   OK
-EXCP  KO partially implemented
-Remarks: Should be able to boot but there is no hw platform currently emulated.
-
 PowerPC Cell:
 INSN  KO Altivec missing and more
 SPR   KO
diff --git a/target-ppc/cpu-models.c b/target-ppc/cpu-models.c
index 7c9466fc07..f6c9b3ab01 100644
--- a/target-ppc/cpu-models.c
+++ b/target-ppc/cpu-models.c
@@ -1156,8 +1156,6 @@
                 "PowerPC 970FX v3.0 (G5)")
     POWERPC_DEF("970fx_v3.1",    CPU_POWERPC_970FX_v31,              970FX,
                 "PowerPC 970FX v3.1 (G5)")
-    POWERPC_DEF("970gx",         CPU_POWERPC_970GX,                  970GX,
-                "PowerPC 970GX (G5)")
     POWERPC_DEF("970mp_v1.0",    CPU_POWERPC_970MP_v10,              970MP,
                 "PowerPC 970MP v1.0")
     POWERPC_DEF("970mp_v1.1",    CPU_POWERPC_970MP_v11,              970MP,
diff --git a/target-ppc/cpu-models.h b/target-ppc/cpu-models.h
index 49ba4a4522..644a126459 100644
--- a/target-ppc/cpu-models.h
+++ b/target-ppc/cpu-models.h
@@ -570,7 +570,6 @@ enum {
     CPU_POWERPC_970FX_v21          = 0x003C0201,
     CPU_POWERPC_970FX_v30          = 0x003C0300,
     CPU_POWERPC_970FX_v31          = 0x003C0301,
-    CPU_POWERPC_970GX              = 0x00450000,
     CPU_POWERPC_970MP_v10          = 0x00440100,
     CPU_POWERPC_970MP_v11          = 0x00440101,
 #define CPU_POWERPC_CELL             CPU_POWERPC_CELL_v32
diff --git a/target-ppc/cpu-qom.h b/target-ppc/cpu-qom.h
index 72b22329b0..b17c024543 100644
--- a/target-ppc/cpu-qom.h
+++ b/target-ppc/cpu-qom.h
@@ -79,6 +79,7 @@ typedef struct PowerPCCPUClass {
 /**
  * PowerPCCPU:
  * @env: #CPUPPCState
+ * @cpu_dt_id: CPU index used in the device tree. KVM uses this index too
  *
  * A PowerPC CPU.
  */
@@ -88,6 +89,7 @@ typedef struct PowerPCCPU {
     /*< public >*/
 
     CPUPPCState env;
+    int cpu_dt_id;
 } PowerPCCPU;
 
 static inline PowerPCCPU *ppc_env_get_cpu(CPUPPCState *env)
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index bb847676a5..afab267485 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -352,6 +352,10 @@ union ppc_avr_t {
     int16_t s16[8];
     int32_t s32[4];
     uint64_t u64[2];
+    int64_t s64[2];
+#ifdef CONFIG_INT128
+    __uint128_t u128;
+#endif
 };
 
 #if !defined(CONFIG_USER_ONLY)
@@ -926,6 +930,7 @@ struct CPUPPCState {
     target_ulong reserve_addr;
     /* Reservation value */
     target_ulong reserve_val;
+    target_ulong reserve_val2;
     /* Reservation store address */
     target_ulong reserve_ea;
     /* Reserved store source register and size */
@@ -961,6 +966,7 @@ struct CPUPPCState {
 #endif
     /* segment registers */
     hwaddr htab_base;
+    /* mask used to normalize hash value to PTEG index */
     hwaddr htab_mask;
     target_ulong sr[32];
     /* externally stored hash table */
@@ -1250,7 +1256,7 @@ static inline int cpu_mmu_index (CPUPPCState *env)
 #define SPR_MPC_EIE           (0x050)
 #define SPR_MPC_EID           (0x051)
 #define SPR_MPC_NRI           (0x052)
-#define SPR_CTRL              (0x088)
+#define SPR_UCTRL             (0x088)
 #define SPR_MPC_CMPA          (0x090)
 #define SPR_MPC_CMPB          (0x091)
 #define SPR_MPC_CMPC          (0x092)
@@ -1259,7 +1265,7 @@ static inline int cpu_mmu_index (CPUPPCState *env)
 #define SPR_MPC_DER           (0x095)
 #define SPR_MPC_COUNTA        (0x096)
 #define SPR_MPC_COUNTB        (0x097)
-#define SPR_UCTRL             (0x098)
+#define SPR_CTRL              (0x098)
 #define SPR_MPC_CMPE          (0x098)
 #define SPR_MPC_CMPF          (0x099)
 #define SPR_MPC_CMPG          (0x09A)
@@ -1322,12 +1328,12 @@ static inline int cpu_mmu_index (CPUPPCState *env)
 #define SPR_BOOKE_IAC3        (0x13A)
 #define SPR_HSRR1             (0x13B)
 #define SPR_BOOKE_IAC4        (0x13B)
-#define SPR_LPCR              (0x13C)
 #define SPR_BOOKE_DAC1        (0x13C)
 #define SPR_LPIDR             (0x13D)
 #define SPR_DABR2             (0x13D)
 #define SPR_BOOKE_DAC2        (0x13D)
 #define SPR_BOOKE_DVC1        (0x13E)
+#define SPR_LPCR              (0x13E)
 #define SPR_BOOKE_DVC2        (0x13F)
 #define SPR_BOOKE_TSR         (0x150)
 #define SPR_BOOKE_TCR         (0x154)
@@ -1508,6 +1514,7 @@ static inline int cpu_mmu_index (CPUPPCState *env)
 #define SPR_RCPU_L2U_RA2      (0x32A)
 #define SPR_MPC_MD_DBRAM1     (0x32A)
 #define SPR_RCPU_L2U_RA3      (0x32B)
+#define SPR_TAR               (0x32F)
 #define SPR_440_INV0          (0x370)
 #define SPR_440_INV1          (0x371)
 #define SPR_440_INV2          (0x372)
@@ -1875,9 +1882,31 @@ enum {
     PPC2_DBRX          = 0x0000000000000010ULL,
     /* Book I 2.05 PowerPC specification                                     */
     PPC2_ISA205        = 0x0000000000000020ULL,
+    /* VSX additions in ISA 2.07                                             */
+    PPC2_VSX207        = 0x0000000000000040ULL,
+    /* ISA 2.06B bpermd                                                      */
+    PPC2_PERM_ISA206   = 0x0000000000000080ULL,
+    /* ISA 2.06B divide extended variants                                    */
+    PPC2_DIVE_ISA206   = 0x0000000000000100ULL,
+    /* ISA 2.06B larx/stcx. instructions                                     */
+    PPC2_ATOMIC_ISA206 = 0x0000000000000200ULL,
+    /* ISA 2.06B floating point integer conversion                           */
+    PPC2_FP_CVT_ISA206 = 0x0000000000000400ULL,
+    /* ISA 2.06B floating point test instructions                            */
+    PPC2_FP_TST_ISA206 = 0x0000000000000800ULL,
+    /* ISA 2.07 bctar instruction                                            */
+    PPC2_BCTAR_ISA207  = 0x0000000000001000ULL,
+    /* ISA 2.07 load/store quadword                                          */
+    PPC2_LSQ_ISA207    = 0x0000000000002000ULL,
+    /* ISA 2.07 Altivec                                                      */
+    PPC2_ALTIVEC_207   = 0x0000000000004000ULL,
 
 #define PPC_TCG_INSNS2 (PPC2_BOOKE206 | PPC2_VSX | PPC2_PRCNTL | PPC2_DBRX | \
-  PPC2_ISA205)
+                        PPC2_ISA205 | PPC2_VSX207 | PPC2_PERM_ISA206 | \
+                        PPC2_DIVE_ISA206 | PPC2_ATOMIC_ISA206 | \
+                        PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206 | \
+                        PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | \
+                        PPC2_ALTIVEC_207)
 };
 
 /*****************************************************************************/
@@ -2154,4 +2183,22 @@ static inline bool cpu_has_work(CPUState *cpu)
 
 void dump_mmu(FILE *f, fprintf_function cpu_fprintf, CPUPPCState *env);
 
+/**
+ * ppc_get_vcpu_dt_id:
+ * @cs: a PowerPCCPU struct.
+ *
+ * Returns a device-tree ID for a CPU.
+ */
+int ppc_get_vcpu_dt_id(PowerPCCPU *cpu);
+
+/**
+ * ppc_get_vcpu_by_dt_id:
+ * @cpu_dt_id: a device tree id
+ *
+ * Searches for a CPU by @cpu_dt_id.
+ *
+ * Returns: a PowerPCCPU struct
+ */
+PowerPCCPU *ppc_get_vcpu_by_dt_id(int cpu_dt_id);
+
 #endif /* !defined (__CPU_PPC_H__) */
diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index 4f6021835f..e7f329566d 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -50,6 +50,16 @@ static inline int isden(float64 d)
     return ((u.ll >> 52) & 0x7FF) == 0;
 }
 
+static inline int ppc_float32_get_unbiased_exp(float32 f)
+{
+    return ((f >> 23) & 0xFF) - 127;
+}
+
+static inline int ppc_float64_get_unbiased_exp(float64 f)
+{
+    return ((f >> 52) & 0x7FF) - 1023;
+}
+
 uint32_t helper_compute_fprf(CPUPPCState *env, uint64_t arg, uint32_t set_fprf)
 {
     CPU_DoubleU farg;
@@ -106,7 +116,8 @@ uint32_t helper_compute_fprf(CPUPPCState *env, uint64_t arg, uint32_t set_fprf)
 }
 
 /* Floating-point invalid operations exception */
-static inline uint64_t fload_invalid_op_excp(CPUPPCState *env, int op)
+static inline uint64_t fload_invalid_op_excp(CPUPPCState *env, int op,
+                                             int set_fpcc)
 {
     uint64_t ret = 0;
     int ve;
@@ -138,8 +149,10 @@ static inline uint64_t fload_invalid_op_excp(CPUPPCState *env, int op)
     case POWERPC_EXCP_FP_VXVC:
         /* Ordered comparison of NaN */
         env->fpscr |= 1 << FPSCR_VXVC;
-        env->fpscr &= ~(0xF << FPSCR_FPCC);
-        env->fpscr |= 0x11 << FPSCR_FPCC;
+        if (set_fpcc) {
+            env->fpscr &= ~(0xF << FPSCR_FPCC);
+            env->fpscr |= 0x11 << FPSCR_FPCC;
+        }
         /* We must update the target FPR before raising the exception */
         if (ve != 0) {
             env->exception_index = POWERPC_EXCP_PROGRAM;
@@ -158,8 +171,10 @@ static inline uint64_t fload_invalid_op_excp(CPUPPCState *env, int op)
         if (ve == 0) {
             /* Set the result to quiet NaN */
             ret = 0x7FF8000000000000ULL;
-            env->fpscr &= ~(0xF << FPSCR_FPCC);
-            env->fpscr |= 0x11 << FPSCR_FPCC;
+            if (set_fpcc) {
+                env->fpscr &= ~(0xF << FPSCR_FPCC);
+                env->fpscr |= 0x11 << FPSCR_FPCC;
+            }
         }
         break;
     case POWERPC_EXCP_FP_VXCVI:
@@ -169,8 +184,10 @@ static inline uint64_t fload_invalid_op_excp(CPUPPCState *env, int op)
         if (ve == 0) {
             /* Set the result to quiet NaN */
             ret = 0x7FF8000000000000ULL;
-            env->fpscr &= ~(0xF << FPSCR_FPCC);
-            env->fpscr |= 0x11 << FPSCR_FPCC;
+            if (set_fpcc) {
+                env->fpscr &= ~(0xF << FPSCR_FPCC);
+                env->fpscr |= 0x11 << FPSCR_FPCC;
+            }
         }
         break;
     }
@@ -505,12 +522,12 @@ uint64_t helper_fadd(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
     if (unlikely(float64_is_infinity(farg1.d) && float64_is_infinity(farg2.d) &&
                  float64_is_neg(farg1.d) != float64_is_neg(farg2.d))) {
         /* Magnitude subtraction of infinities */
-        farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI);
+        farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
     } else {
         if (unlikely(float64_is_signaling_nan(farg1.d) ||
                      float64_is_signaling_nan(farg2.d))) {
             /* sNaN addition */
-            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN);
+            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
         }
         farg1.d = float64_add(farg1.d, farg2.d, &env->fp_status);
     }
@@ -529,12 +546,12 @@ uint64_t helper_fsub(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
     if (unlikely(float64_is_infinity(farg1.d) && float64_is_infinity(farg2.d) &&
                  float64_is_neg(farg1.d) == float64_is_neg(farg2.d))) {
         /* Magnitude subtraction of infinities */
-        farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI);
+        farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
     } else {
         if (unlikely(float64_is_signaling_nan(farg1.d) ||
                      float64_is_signaling_nan(farg2.d))) {
             /* sNaN subtraction */
-            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN);
+            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
         }
         farg1.d = float64_sub(farg1.d, farg2.d, &env->fp_status);
     }
@@ -553,12 +570,12 @@ uint64_t helper_fmul(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
     if (unlikely((float64_is_infinity(farg1.d) && float64_is_zero(farg2.d)) ||
                  (float64_is_zero(farg1.d) && float64_is_infinity(farg2.d)))) {
         /* Multiplication of zero by infinity */
-        farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ);
+        farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);
     } else {
         if (unlikely(float64_is_signaling_nan(farg1.d) ||
                      float64_is_signaling_nan(farg2.d))) {
             /* sNaN multiplication */
-            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN);
+            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
         }
         farg1.d = float64_mul(farg1.d, farg2.d, &env->fp_status);
     }
@@ -577,15 +594,15 @@ uint64_t helper_fdiv(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
     if (unlikely(float64_is_infinity(farg1.d) &&
                  float64_is_infinity(farg2.d))) {
         /* Division of infinity by infinity */
-        farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIDI);
+        farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIDI, 1);
     } else if (unlikely(float64_is_zero(farg1.d) && float64_is_zero(farg2.d))) {
         /* Division of zero by zero */
-        farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXZDZ);
+        farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXZDZ, 1);
     } else {
         if (unlikely(float64_is_signaling_nan(farg1.d) ||
                      float64_is_signaling_nan(farg2.d))) {
             /* sNaN division */
-            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN);
+            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
         }
         farg1.d = float64_div(farg1.d, farg2.d, &env->fp_status);
     }
@@ -593,107 +610,63 @@ uint64_t helper_fdiv(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
     return farg1.ll;
 }
 
-/* fctiw - fctiw. */
-uint64_t helper_fctiw(CPUPPCState *env, uint64_t arg)
-{
-    CPU_DoubleU farg;
-
-    farg.ll = arg;
-
-    if (unlikely(float64_is_signaling_nan(farg.d))) {
-        /* sNaN conversion */
-        farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN |
-                                        POWERPC_EXCP_FP_VXCVI);
-    } else if (unlikely(float64_is_quiet_nan(farg.d) ||
-                        float64_is_infinity(farg.d))) {
-        /* qNan / infinity conversion */
-        farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXCVI);
-    } else {
-        farg.ll = float64_to_int32(farg.d, &env->fp_status);
-        /* XXX: higher bits are not supposed to be significant.
-         *     to make tests easier, return the same as a real PowerPC 750
-         */
-        farg.ll |= 0xFFF80000ULL << 32;
-    }
-    return farg.ll;
-}
-
-/* fctiwz - fctiwz. */
-uint64_t helper_fctiwz(CPUPPCState *env, uint64_t arg)
-{
-    CPU_DoubleU farg;
-
-    farg.ll = arg;
-
-    if (unlikely(float64_is_signaling_nan(farg.d))) {
-        /* sNaN conversion */
-        farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN |
-                                        POWERPC_EXCP_FP_VXCVI);
-    } else if (unlikely(float64_is_quiet_nan(farg.d) ||
-                        float64_is_infinity(farg.d))) {
-        /* qNan / infinity conversion */
-        farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXCVI);
-    } else {
-        farg.ll = float64_to_int32_round_to_zero(farg.d, &env->fp_status);
-        /* XXX: higher bits are not supposed to be significant.
-         *     to make tests easier, return the same as a real PowerPC 750
-         */
-        farg.ll |= 0xFFF80000ULL << 32;
-    }
-    return farg.ll;
-}
 
+#define FPU_FCTI(op, cvt, nanval)                                      \
+uint64_t helper_##op(CPUPPCState *env, uint64_t arg)                   \
+{                                                                      \
+    CPU_DoubleU farg;                                                  \
+                                                                       \
+    farg.ll = arg;                                                     \
+    farg.ll = float64_to_##cvt(farg.d, &env->fp_status);               \
+                                                                       \
+    if (unlikely(env->fp_status.float_exception_flags)) {              \
+        if (float64_is_any_nan(arg)) {                                 \
+            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXCVI, 1);      \
+            if (float64_is_signaling_nan(arg)) {                       \
+                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1); \
+            }                                                          \
+            farg.ll = nanval;                                          \
+        } else if (env->fp_status.float_exception_flags &              \
+                   float_flag_invalid) {                               \
+            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXCVI, 1);      \
+        }                                                              \
+        helper_float_check_status(env);                                \
+    }                                                                  \
+    return farg.ll;                                                    \
+ }
+
+FPU_FCTI(fctiw, int32, 0x80000000U)
+FPU_FCTI(fctiwz, int32_round_to_zero, 0x80000000U)
+FPU_FCTI(fctiwu, uint32, 0x00000000U)
+FPU_FCTI(fctiwuz, uint32_round_to_zero, 0x00000000U)
 #if defined(TARGET_PPC64)
-/* fcfid - fcfid. */
-uint64_t helper_fcfid(CPUPPCState *env, uint64_t arg)
-{
-    CPU_DoubleU farg;
-
-    farg.d = int64_to_float64(arg, &env->fp_status);
-    return farg.ll;
-}
-
-/* fctid - fctid. */
-uint64_t helper_fctid(CPUPPCState *env, uint64_t arg)
-{
-    CPU_DoubleU farg;
-
-    farg.ll = arg;
-
-    if (unlikely(float64_is_signaling_nan(farg.d))) {
-        /* sNaN conversion */
-        farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN |
-                                        POWERPC_EXCP_FP_VXCVI);
-    } else if (unlikely(float64_is_quiet_nan(farg.d) ||
-                        float64_is_infinity(farg.d))) {
-        /* qNan / infinity conversion */
-        farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXCVI);
-    } else {
-        farg.ll = float64_to_int64(farg.d, &env->fp_status);
-    }
-    return farg.ll;
-}
-
-/* fctidz - fctidz. */
-uint64_t helper_fctidz(CPUPPCState *env, uint64_t arg)
-{
-    CPU_DoubleU farg;
+FPU_FCTI(fctid, int64, 0x8000000000000000ULL)
+FPU_FCTI(fctidz, int64_round_to_zero, 0x8000000000000000ULL)
+FPU_FCTI(fctidu, uint64, 0x0000000000000000ULL)
+FPU_FCTI(fctiduz, uint64_round_to_zero, 0x0000000000000000ULL)
+#endif
 
-    farg.ll = arg;
+#if defined(TARGET_PPC64)
 
-    if (unlikely(float64_is_signaling_nan(farg.d))) {
-        /* sNaN conversion */
-        farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN |
-                                        POWERPC_EXCP_FP_VXCVI);
-    } else if (unlikely(float64_is_quiet_nan(farg.d) ||
-                        float64_is_infinity(farg.d))) {
-        /* qNan / infinity conversion */
-        farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXCVI);
-    } else {
-        farg.ll = float64_to_int64_round_to_zero(farg.d, &env->fp_status);
-    }
-    return farg.ll;
-}
+#define FPU_FCFI(op, cvtr, is_single)                      \
+uint64_t helper_##op(CPUPPCState *env, uint64_t arg)       \
+{                                                          \
+    CPU_DoubleU farg;                                      \
+                                                           \
+    if (is_single) {                                       \
+        float32 tmp = cvtr(arg, &env->fp_status);          \
+        farg.d = float32_to_float64(tmp, &env->fp_status); \
+    } else {                                               \
+        farg.d = cvtr(arg, &env->fp_status);               \
+    }                                                      \
+    helper_float_check_status(env);                        \
+    return farg.ll;                                        \
+}
+
+FPU_FCFI(fcfid, int64_to_float64, 0)
+FPU_FCFI(fcfids, int64_to_float32, 1)
+FPU_FCFI(fcfidu, uint64_to_float64, 0)
+FPU_FCFI(fcfidus, uint64_to_float32, 1)
 
 #endif
 
@@ -706,24 +679,28 @@ static inline uint64_t do_fri(CPUPPCState *env, uint64_t arg,
 
     if (unlikely(float64_is_signaling_nan(farg.d))) {
         /* sNaN round */
-        farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN |
-                                        POWERPC_EXCP_FP_VXCVI);
-    } else if (unlikely(float64_is_quiet_nan(farg.d) ||
-                        float64_is_infinity(farg.d))) {
-        /* qNan / infinity round */
-        farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXCVI);
+        fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
+        farg.ll = arg | 0x0008000000000000ULL;
     } else {
+        int inexact = get_float_exception_flags(&env->fp_status) &
+                      float_flag_inexact;
         set_float_rounding_mode(rounding_mode, &env->fp_status);
         farg.ll = float64_round_to_int(farg.d, &env->fp_status);
         /* Restore rounding mode from FPSCR */
         fpscr_set_rounding_mode(env);
+
+        /* fri* does not set FPSCR[XX] */
+        if (!inexact) {
+            env->fp_status.float_exception_flags &= ~float_flag_inexact;
+        }
     }
+    helper_float_check_status(env);
     return farg.ll;
 }
 
 uint64_t helper_frin(CPUPPCState *env, uint64_t arg)
 {
-    return do_fri(env, arg, float_round_nearest_even);
+    return do_fri(env, arg, float_round_ties_away);
 }
 
 uint64_t helper_friz(CPUPPCState *env, uint64_t arg)
@@ -754,13 +731,13 @@ uint64_t helper_fmadd(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
     if (unlikely((float64_is_infinity(farg1.d) && float64_is_zero(farg2.d)) ||
                  (float64_is_zero(farg1.d) && float64_is_infinity(farg2.d)))) {
         /* Multiplication of zero by infinity */
-        farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ);
+        farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);
     } else {
         if (unlikely(float64_is_signaling_nan(farg1.d) ||
                      float64_is_signaling_nan(farg2.d) ||
                      float64_is_signaling_nan(farg3.d))) {
             /* sNaN operation */
-            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN);
+            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
         }
         /* This is the way the PowerPC specification defines it */
         float128 ft0_128, ft1_128;
@@ -772,7 +749,7 @@ uint64_t helper_fmadd(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
                      float64_is_infinity(farg3.d) &&
                      float128_is_neg(ft0_128) != float64_is_neg(farg3.d))) {
             /* Magnitude subtraction of infinities */
-            farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI);
+            farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
         } else {
             ft1_128 = float64_to_float128(farg3.d, &env->fp_status);
             ft0_128 = float128_add(ft0_128, ft1_128, &env->fp_status);
@@ -797,13 +774,13 @@ uint64_t helper_fmsub(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
                  (float64_is_zero(farg1.d) &&
                   float64_is_infinity(farg2.d)))) {
         /* Multiplication of zero by infinity */
-        farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ);
+        farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);
     } else {
         if (unlikely(float64_is_signaling_nan(farg1.d) ||
                      float64_is_signaling_nan(farg2.d) ||
                      float64_is_signaling_nan(farg3.d))) {
             /* sNaN operation */
-            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN);
+            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
         }
         /* This is the way the PowerPC specification defines it */
         float128 ft0_128, ft1_128;
@@ -815,7 +792,7 @@ uint64_t helper_fmsub(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
                      float64_is_infinity(farg3.d) &&
                      float128_is_neg(ft0_128) == float64_is_neg(farg3.d))) {
             /* Magnitude subtraction of infinities */
-            farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI);
+            farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
         } else {
             ft1_128 = float64_to_float128(farg3.d, &env->fp_status);
             ft0_128 = float128_sub(ft0_128, ft1_128, &env->fp_status);
@@ -838,13 +815,13 @@ uint64_t helper_fnmadd(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
     if (unlikely((float64_is_infinity(farg1.d) && float64_is_zero(farg2.d)) ||
                  (float64_is_zero(farg1.d) && float64_is_infinity(farg2.d)))) {
         /* Multiplication of zero by infinity */
-        farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ);
+        farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);
     } else {
         if (unlikely(float64_is_signaling_nan(farg1.d) ||
                      float64_is_signaling_nan(farg2.d) ||
                      float64_is_signaling_nan(farg3.d))) {
             /* sNaN operation */
-            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN);
+            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
         }
         /* This is the way the PowerPC specification defines it */
         float128 ft0_128, ft1_128;
@@ -856,7 +833,7 @@ uint64_t helper_fnmadd(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
                      float64_is_infinity(farg3.d) &&
                      float128_is_neg(ft0_128) != float64_is_neg(farg3.d))) {
             /* Magnitude subtraction of infinities */
-            farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI);
+            farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
         } else {
             ft1_128 = float64_to_float128(farg3.d, &env->fp_status);
             ft0_128 = float128_add(ft0_128, ft1_128, &env->fp_status);
@@ -883,13 +860,13 @@ uint64_t helper_fnmsub(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
                  (float64_is_zero(farg1.d) &&
                   float64_is_infinity(farg2.d)))) {
         /* Multiplication of zero by infinity */
-        farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ);
+        farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);
     } else {
         if (unlikely(float64_is_signaling_nan(farg1.d) ||
                      float64_is_signaling_nan(farg2.d) ||
                      float64_is_signaling_nan(farg3.d))) {
             /* sNaN operation */
-            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN);
+            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
         }
         /* This is the way the PowerPC specification defines it */
         float128 ft0_128, ft1_128;
@@ -901,7 +878,7 @@ uint64_t helper_fnmsub(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
                      float64_is_infinity(farg3.d) &&
                      float128_is_neg(ft0_128) == float64_is_neg(farg3.d))) {
             /* Magnitude subtraction of infinities */
-            farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI);
+            farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
         } else {
             ft1_128 = float64_to_float128(farg3.d, &env->fp_status);
             ft0_128 = float128_sub(ft0_128, ft1_128, &env->fp_status);
@@ -924,7 +901,7 @@ uint64_t helper_frsp(CPUPPCState *env, uint64_t arg)
 
     if (unlikely(float64_is_signaling_nan(farg.d))) {
         /* sNaN square root */
-        fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN);
+        fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
     }
     f32 = float64_to_float32(farg.d, &env->fp_status);
     farg.d = float32_to_float64(f32, &env->fp_status);
@@ -941,11 +918,11 @@ uint64_t helper_fsqrt(CPUPPCState *env, uint64_t arg)
 
     if (unlikely(float64_is_neg(farg.d) && !float64_is_zero(farg.d))) {
         /* Square root of a negative nonzero number */
-        farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT);
+        farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, 1);
     } else {
         if (unlikely(float64_is_signaling_nan(farg.d))) {
             /* sNaN square root */
-            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN);
+            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
         }
         farg.d = float64_sqrt(farg.d, &env->fp_status);
     }
@@ -961,7 +938,7 @@ uint64_t helper_fre(CPUPPCState *env, uint64_t arg)
 
     if (unlikely(float64_is_signaling_nan(farg.d))) {
         /* sNaN reciprocal */
-        fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN);
+        fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
     }
     farg.d = float64_div(float64_one, farg.d, &env->fp_status);
     return farg.d;
@@ -977,7 +954,7 @@ uint64_t helper_fres(CPUPPCState *env, uint64_t arg)
 
     if (unlikely(float64_is_signaling_nan(farg.d))) {
         /* sNaN reciprocal */
-        fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN);
+        fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
     }
     farg.d = float64_div(float64_one, farg.d, &env->fp_status);
     f32 = float64_to_float32(farg.d, &env->fp_status);
@@ -996,11 +973,11 @@ uint64_t helper_frsqrte(CPUPPCState *env, uint64_t arg)
 
     if (unlikely(float64_is_neg(farg.d) && !float64_is_zero(farg.d))) {
         /* Reciprocal square root of a negative nonzero number */
-        farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT);
+        farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, 1);
     } else {
         if (unlikely(float64_is_signaling_nan(farg.d))) {
             /* sNaN reciprocal square root */
-            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN);
+            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
         }
         farg.d = float64_sqrt(farg.d, &env->fp_status);
         farg.d = float64_div(float64_one, farg.d, &env->fp_status);
@@ -1026,6 +1003,73 @@ uint64_t helper_fsel(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
     }
 }
 
+uint32_t helper_ftdiv(uint64_t fra, uint64_t frb)
+{
+    int fe_flag = 0;
+    int fg_flag = 0;
+
+    if (unlikely(float64_is_infinity(fra) ||
+                 float64_is_infinity(frb) ||
+                 float64_is_zero(frb))) {
+        fe_flag = 1;
+        fg_flag = 1;
+    } else {
+        int e_a = ppc_float64_get_unbiased_exp(fra);
+        int e_b = ppc_float64_get_unbiased_exp(frb);
+
+        if (unlikely(float64_is_any_nan(fra) ||
+                     float64_is_any_nan(frb))) {
+            fe_flag = 1;
+        } else if ((e_b <= -1022) || (e_b >= 1021)) {
+            fe_flag = 1;
+        } else if (!float64_is_zero(fra) &&
+                   (((e_a - e_b) >= 1023) ||
+                    ((e_a - e_b) <= -1021) ||
+                    (e_a <= -970))) {
+            fe_flag = 1;
+        }
+
+        if (unlikely(float64_is_zero_or_denormal(frb))) {
+            /* XB is not zero because of the above check and */
+            /* so must be denormalized.                      */
+            fg_flag = 1;
+        }
+    }
+
+    return 0x8 | (fg_flag ? 4 : 0) | (fe_flag ? 2 : 0);
+}
+
+uint32_t helper_ftsqrt(uint64_t frb)
+{
+    int fe_flag = 0;
+    int fg_flag = 0;
+
+    if (unlikely(float64_is_infinity(frb) || float64_is_zero(frb))) {
+        fe_flag = 1;
+        fg_flag = 1;
+    } else {
+        int e_b = ppc_float64_get_unbiased_exp(frb);
+
+        if (unlikely(float64_is_any_nan(frb))) {
+            fe_flag = 1;
+        } else if (unlikely(float64_is_zero(frb))) {
+            fe_flag = 1;
+        } else if (unlikely(float64_is_neg(frb))) {
+            fe_flag = 1;
+        } else if (!float64_is_zero(frb) && (e_b <= (-1022+52))) {
+            fe_flag = 1;
+        }
+
+        if (unlikely(float64_is_zero_or_denormal(frb))) {
+            /* XB is not zero because of the above check and */
+            /* therefore must be denormalized.               */
+            fg_flag = 1;
+        }
+    }
+
+    return 0x8 | (fg_flag ? 4 : 0) | (fe_flag ? 2 : 0);
+}
+
 void helper_fcmpu(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
                   uint32_t crfD)
 {
@@ -1053,7 +1097,7 @@ void helper_fcmpu(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
                  && (float64_is_signaling_nan(farg1.d) ||
                      float64_is_signaling_nan(farg2.d)))) {
         /* sNaN comparison */
-        fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN);
+        fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
     }
 }
 
@@ -1085,10 +1129,10 @@ void helper_fcmpo(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
             float64_is_signaling_nan(farg2.d)) {
             /* sNaN comparison */
             fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN |
-                                  POWERPC_EXCP_FP_VXVC);
+                                  POWERPC_EXCP_FP_VXVC, 1);
         } else {
             /* qNaN comparison */
-            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXVC);
+            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXVC, 1);
         }
     }
 }
@@ -1710,3 +1754,969 @@ uint32_t helper_efdcmpeq(CPUPPCState *env, uint64_t op1, uint64_t op2)
     /* XXX: TODO: test special values (NaN, infinites, ...) */
     return helper_efdtsteq(env, op1, op2);
 }
+
+#define DECODE_SPLIT(opcode, shift1, nb1, shift2, nb2) \
+    (((((opcode) >> (shift1)) & ((1 << (nb1)) - 1)) << nb2) |    \
+     (((opcode) >> (shift2)) & ((1 << (nb2)) - 1)))
+
+#define xT(opcode) DECODE_SPLIT(opcode, 0, 1, 21, 5)
+#define xA(opcode) DECODE_SPLIT(opcode, 2, 1, 16, 5)
+#define xB(opcode) DECODE_SPLIT(opcode, 1, 1, 11, 5)
+#define xC(opcode) DECODE_SPLIT(opcode, 3, 1,  6, 5)
+#define BF(opcode) (((opcode) >> (31-8)) & 7)
+
+typedef union _ppc_vsr_t {
+    uint64_t u64[2];
+    uint32_t u32[4];
+    float32 f32[4];
+    float64 f64[2];
+} ppc_vsr_t;
+
+static void getVSR(int n, ppc_vsr_t *vsr, CPUPPCState *env)
+{
+    if (n < 32) {
+        vsr->f64[0] = env->fpr[n];
+        vsr->u64[1] = env->vsr[n];
+    } else {
+        vsr->u64[0] = env->avr[n-32].u64[0];
+        vsr->u64[1] = env->avr[n-32].u64[1];
+    }
+}
+
+static void putVSR(int n, ppc_vsr_t *vsr, CPUPPCState *env)
+{
+    if (n < 32) {
+        env->fpr[n] = vsr->f64[0];
+        env->vsr[n] = vsr->u64[1];
+    } else {
+        env->avr[n-32].u64[0] = vsr->u64[0];
+        env->avr[n-32].u64[1] = vsr->u64[1];
+    }
+}
+
+#define float64_to_float64(x, env) x
+
+
+/* VSX_ADD_SUB - VSX floating point add/subract
+ *   name  - instruction mnemonic
+ *   op    - operation (add or sub)
+ *   nels  - number of elements (1, 2 or 4)
+ *   tp    - type (float32 or float64)
+ *   fld   - vsr_t field (f32 or f64)
+ *   sfprf - set FPRF
+ */
+#define VSX_ADD_SUB(name, op, nels, tp, fld, sfprf, r2sp)                    \
+void helper_##name(CPUPPCState *env, uint32_t opcode)                        \
+{                                                                            \
+    ppc_vsr_t xt, xa, xb;                                                    \
+    int i;                                                                   \
+                                                                             \
+    getVSR(xA(opcode), &xa, env);                                            \
+    getVSR(xB(opcode), &xb, env);                                            \
+    getVSR(xT(opcode), &xt, env);                                            \
+    helper_reset_fpstatus(env);                                              \
+                                                                             \
+    for (i = 0; i < nels; i++) {                                             \
+        float_status tstat = env->fp_status;                                 \
+        set_float_exception_flags(0, &tstat);                                \
+        xt.fld[i] = tp##_##op(xa.fld[i], xb.fld[i], &tstat);                 \
+        env->fp_status.float_exception_flags |= tstat.float_exception_flags; \
+                                                                             \
+        if (unlikely(tstat.float_exception_flags & float_flag_invalid)) {    \
+            if (tp##_is_infinity(xa.fld[i]) && tp##_is_infinity(xb.fld[i])) {\
+                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, sfprf);    \
+            } else if (tp##_is_signaling_nan(xa.fld[i]) ||                   \
+                       tp##_is_signaling_nan(xb.fld[i])) {                   \
+                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, sfprf);   \
+            }                                                                \
+        }                                                                    \
+                                                                             \
+        if (r2sp) {                                                          \
+            xt.fld[i] = helper_frsp(env, xt.fld[i]);                         \
+        }                                                                    \
+                                                                             \
+        if (sfprf) {                                                         \
+            helper_compute_fprf(env, xt.fld[i], sfprf);                      \
+        }                                                                    \
+    }                                                                        \
+    putVSR(xT(opcode), &xt, env);                                            \
+    helper_float_check_status(env);                                          \
+}
+
+VSX_ADD_SUB(xsadddp, add, 1, float64, f64, 1, 0)
+VSX_ADD_SUB(xsaddsp, add, 1, float64, f64, 1, 1)
+VSX_ADD_SUB(xvadddp, add, 2, float64, f64, 0, 0)
+VSX_ADD_SUB(xvaddsp, add, 4, float32, f32, 0, 0)
+VSX_ADD_SUB(xssubdp, sub, 1, float64, f64, 1, 0)
+VSX_ADD_SUB(xssubsp, sub, 1, float64, f64, 1, 1)
+VSX_ADD_SUB(xvsubdp, sub, 2, float64, f64, 0, 0)
+VSX_ADD_SUB(xvsubsp, sub, 4, float32, f32, 0, 0)
+
+/* VSX_MUL - VSX floating point multiply
+ *   op    - instruction mnemonic
+ *   nels  - number of elements (1, 2 or 4)
+ *   tp    - type (float32 or float64)
+ *   fld   - vsr_t field (f32 or f64)
+ *   sfprf - set FPRF
+ */
+#define VSX_MUL(op, nels, tp, fld, sfprf, r2sp)                              \
+void helper_##op(CPUPPCState *env, uint32_t opcode)                          \
+{                                                                            \
+    ppc_vsr_t xt, xa, xb;                                                    \
+    int i;                                                                   \
+                                                                             \
+    getVSR(xA(opcode), &xa, env);                                            \
+    getVSR(xB(opcode), &xb, env);                                            \
+    getVSR(xT(opcode), &xt, env);                                            \
+    helper_reset_fpstatus(env);                                              \
+                                                                             \
+    for (i = 0; i < nels; i++) {                                             \
+        float_status tstat = env->fp_status;                                 \
+        set_float_exception_flags(0, &tstat);                                \
+        xt.fld[i] = tp##_mul(xa.fld[i], xb.fld[i], &tstat);                  \
+        env->fp_status.float_exception_flags |= tstat.float_exception_flags; \
+                                                                             \
+        if (unlikely(tstat.float_exception_flags & float_flag_invalid)) {    \
+            if ((tp##_is_infinity(xa.fld[i]) && tp##_is_zero(xb.fld[i])) ||  \
+                (tp##_is_infinity(xb.fld[i]) && tp##_is_zero(xa.fld[i]))) {  \
+                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, sfprf);    \
+            } else if (tp##_is_signaling_nan(xa.fld[i]) ||                   \
+                       tp##_is_signaling_nan(xb.fld[i])) {                   \
+                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, sfprf);   \
+            }                                                                \
+        }                                                                    \
+                                                                             \
+        if (r2sp) {                                                          \
+            xt.fld[i] = helper_frsp(env, xt.fld[i]);                         \
+        }                                                                    \
+                                                                             \
+        if (sfprf) {                                                         \
+            helper_compute_fprf(env, xt.fld[i], sfprf);                      \
+        }                                                                    \
+    }                                                                        \
+                                                                             \
+    putVSR(xT(opcode), &xt, env);                                            \
+    helper_float_check_status(env);                                          \
+}
+
+VSX_MUL(xsmuldp, 1, float64, f64, 1, 0)
+VSX_MUL(xsmulsp, 1, float64, f64, 1, 1)
+VSX_MUL(xvmuldp, 2, float64, f64, 0, 0)
+VSX_MUL(xvmulsp, 4, float32, f32, 0, 0)
+
+/* VSX_DIV - VSX floating point divide
+ *   op    - instruction mnemonic
+ *   nels  - number of elements (1, 2 or 4)
+ *   tp    - type (float32 or float64)
+ *   fld   - vsr_t field (f32 or f64)
+ *   sfprf - set FPRF
+ */
+#define VSX_DIV(op, nels, tp, fld, sfprf, r2sp)                               \
+void helper_##op(CPUPPCState *env, uint32_t opcode)                           \
+{                                                                             \
+    ppc_vsr_t xt, xa, xb;                                                     \
+    int i;                                                                    \
+                                                                              \
+    getVSR(xA(opcode), &xa, env);                                             \
+    getVSR(xB(opcode), &xb, env);                                             \
+    getVSR(xT(opcode), &xt, env);                                             \
+    helper_reset_fpstatus(env);                                               \
+                                                                              \
+    for (i = 0; i < nels; i++) {                                              \
+        float_status tstat = env->fp_status;                                  \
+        set_float_exception_flags(0, &tstat);                                 \
+        xt.fld[i] = tp##_div(xa.fld[i], xb.fld[i], &tstat);                   \
+        env->fp_status.float_exception_flags |= tstat.float_exception_flags;  \
+                                                                              \
+        if (unlikely(tstat.float_exception_flags & float_flag_invalid)) {     \
+            if (tp##_is_infinity(xa.fld[i]) && tp##_is_infinity(xb.fld[i])) { \
+                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIDI, sfprf);     \
+            } else if (tp##_is_zero(xa.fld[i]) &&                             \
+                tp##_is_zero(xb.fld[i])) {                                    \
+                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXZDZ, sfprf);     \
+            } else if (tp##_is_signaling_nan(xa.fld[i]) ||                    \
+                tp##_is_signaling_nan(xb.fld[i])) {                           \
+                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, sfprf);    \
+            }                                                                 \
+        }                                                                     \
+                                                                              \
+        if (r2sp) {                                                           \
+            xt.fld[i] = helper_frsp(env, xt.fld[i]);                          \
+        }                                                                     \
+                                                                              \
+        if (sfprf) {                                                          \
+            helper_compute_fprf(env, xt.fld[i], sfprf);                       \
+        }                                                                     \
+    }                                                                         \
+                                                                              \
+    putVSR(xT(opcode), &xt, env);                                             \
+    helper_float_check_status(env);                                           \
+}
+
+VSX_DIV(xsdivdp, 1, float64, f64, 1, 0)
+VSX_DIV(xsdivsp, 1, float64, f64, 1, 1)
+VSX_DIV(xvdivdp, 2, float64, f64, 0, 0)
+VSX_DIV(xvdivsp, 4, float32, f32, 0, 0)
+
+/* VSX_RE  - VSX floating point reciprocal estimate
+ *   op    - instruction mnemonic
+ *   nels  - number of elements (1, 2 or 4)
+ *   tp    - type (float32 or float64)
+ *   fld   - vsr_t field (f32 or f64)
+ *   sfprf - set FPRF
+ */
+#define VSX_RE(op, nels, tp, fld, sfprf, r2sp)                                \
+void helper_##op(CPUPPCState *env, uint32_t opcode)                           \
+{                                                                             \
+    ppc_vsr_t xt, xb;                                                         \
+    int i;                                                                    \
+                                                                              \
+    getVSR(xB(opcode), &xb, env);                                             \
+    getVSR(xT(opcode), &xt, env);                                             \
+    helper_reset_fpstatus(env);                                               \
+                                                                              \
+    for (i = 0; i < nels; i++) {                                              \
+        if (unlikely(tp##_is_signaling_nan(xb.fld[i]))) {                     \
+                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, sfprf);    \
+        }                                                                     \
+        xt.fld[i] = tp##_div(tp##_one, xb.fld[i], &env->fp_status);           \
+                                                                              \
+        if (r2sp) {                                                           \
+            xt.fld[i] = helper_frsp(env, xt.fld[i]);                          \
+        }                                                                     \
+                                                                              \
+        if (sfprf) {                                                          \
+            helper_compute_fprf(env, xt.fld[0], sfprf);                       \
+        }                                                                     \
+    }                                                                         \
+                                                                              \
+    putVSR(xT(opcode), &xt, env);                                             \
+    helper_float_check_status(env);                                           \
+}
+
+VSX_RE(xsredp, 1, float64, f64, 1, 0)
+VSX_RE(xsresp, 1, float64, f64, 1, 1)
+VSX_RE(xvredp, 2, float64, f64, 0, 0)
+VSX_RE(xvresp, 4, float32, f32, 0, 0)
+
+/* VSX_SQRT - VSX floating point square root
+ *   op    - instruction mnemonic
+ *   nels  - number of elements (1, 2 or 4)
+ *   tp    - type (float32 or float64)
+ *   fld   - vsr_t field (f32 or f64)
+ *   sfprf - set FPRF
+ */
+#define VSX_SQRT(op, nels, tp, fld, sfprf, r2sp)                             \
+void helper_##op(CPUPPCState *env, uint32_t opcode)                          \
+{                                                                            \
+    ppc_vsr_t xt, xb;                                                        \
+    int i;                                                                   \
+                                                                             \
+    getVSR(xB(opcode), &xb, env);                                            \
+    getVSR(xT(opcode), &xt, env);                                            \
+    helper_reset_fpstatus(env);                                              \
+                                                                             \
+    for (i = 0; i < nels; i++) {                                             \
+        float_status tstat = env->fp_status;                                 \
+        set_float_exception_flags(0, &tstat);                                \
+        xt.fld[i] = tp##_sqrt(xb.fld[i], &tstat);                            \
+        env->fp_status.float_exception_flags |= tstat.float_exception_flags; \
+                                                                             \
+        if (unlikely(tstat.float_exception_flags & float_flag_invalid)) {    \
+            if (tp##_is_neg(xb.fld[i]) && !tp##_is_zero(xb.fld[i])) {        \
+                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, sfprf);   \
+            } else if (tp##_is_signaling_nan(xb.fld[i])) {                   \
+                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, sfprf);   \
+            }                                                                \
+        }                                                                    \
+                                                                             \
+        if (r2sp) {                                                          \
+            xt.fld[i] = helper_frsp(env, xt.fld[i]);                         \
+        }                                                                    \
+                                                                             \
+        if (sfprf) {                                                         \
+            helper_compute_fprf(env, xt.fld[i], sfprf);                      \
+        }                                                                    \
+    }                                                                        \
+                                                                             \
+    putVSR(xT(opcode), &xt, env);                                            \
+    helper_float_check_status(env);                                          \
+}
+
+VSX_SQRT(xssqrtdp, 1, float64, f64, 1, 0)
+VSX_SQRT(xssqrtsp, 1, float64, f64, 1, 1)
+VSX_SQRT(xvsqrtdp, 2, float64, f64, 0, 0)
+VSX_SQRT(xvsqrtsp, 4, float32, f32, 0, 0)
+
+/* VSX_RSQRTE - VSX floating point reciprocal square root estimate
+ *   op    - instruction mnemonic
+ *   nels  - number of elements (1, 2 or 4)
+ *   tp    - type (float32 or float64)
+ *   fld   - vsr_t field (f32 or f64)
+ *   sfprf - set FPRF
+ */
+#define VSX_RSQRTE(op, nels, tp, fld, sfprf, r2sp)                           \
+void helper_##op(CPUPPCState *env, uint32_t opcode)                          \
+{                                                                            \
+    ppc_vsr_t xt, xb;                                                        \
+    int i;                                                                   \
+                                                                             \
+    getVSR(xB(opcode), &xb, env);                                            \
+    getVSR(xT(opcode), &xt, env);                                            \
+    helper_reset_fpstatus(env);                                              \
+                                                                             \
+    for (i = 0; i < nels; i++) {                                             \
+        float_status tstat = env->fp_status;                                 \
+        set_float_exception_flags(0, &tstat);                                \
+        xt.fld[i] = tp##_sqrt(xb.fld[i], &tstat);                            \
+        xt.fld[i] = tp##_div(tp##_one, xt.fld[i], &tstat);                   \
+        env->fp_status.float_exception_flags |= tstat.float_exception_flags; \
+                                                                             \
+        if (unlikely(tstat.float_exception_flags & float_flag_invalid)) {    \
+            if (tp##_is_neg(xb.fld[i]) && !tp##_is_zero(xb.fld[i])) {        \
+                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, sfprf);   \
+            } else if (tp##_is_signaling_nan(xb.fld[i])) {                   \
+                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, sfprf);   \
+            }                                                                \
+        }                                                                    \
+                                                                             \
+        if (r2sp) {                                                          \
+            xt.fld[i] = helper_frsp(env, xt.fld[i]);                         \
+        }                                                                    \
+                                                                             \
+        if (sfprf) {                                                         \
+            helper_compute_fprf(env, xt.fld[i], sfprf);                      \
+        }                                                                    \
+    }                                                                        \
+                                                                             \
+    putVSR(xT(opcode), &xt, env);                                            \
+    helper_float_check_status(env);                                          \
+}
+
+VSX_RSQRTE(xsrsqrtedp, 1, float64, f64, 1, 0)
+VSX_RSQRTE(xsrsqrtesp, 1, float64, f64, 1, 1)
+VSX_RSQRTE(xvrsqrtedp, 2, float64, f64, 0, 0)
+VSX_RSQRTE(xvrsqrtesp, 4, float32, f32, 0, 0)
+
+/* VSX_TDIV - VSX floating point test for divide
+ *   op    - instruction mnemonic
+ *   nels  - number of elements (1, 2 or 4)
+ *   tp    - type (float32 or float64)
+ *   fld   - vsr_t field (f32 or f64)
+ *   emin  - minimum unbiased exponent
+ *   emax  - maximum unbiased exponent
+ *   nbits - number of fraction bits
+ */
+#define VSX_TDIV(op, nels, tp, fld, emin, emax, nbits)                  \
+void helper_##op(CPUPPCState *env, uint32_t opcode)                     \
+{                                                                       \
+    ppc_vsr_t xa, xb;                                                   \
+    int i;                                                              \
+    int fe_flag = 0;                                                    \
+    int fg_flag = 0;                                                    \
+                                                                        \
+    getVSR(xA(opcode), &xa, env);                                       \
+    getVSR(xB(opcode), &xb, env);                                       \
+                                                                        \
+    for (i = 0; i < nels; i++) {                                        \
+        if (unlikely(tp##_is_infinity(xa.fld[i]) ||                     \
+                     tp##_is_infinity(xb.fld[i]) ||                     \
+                     tp##_is_zero(xb.fld[i]))) {                        \
+            fe_flag = 1;                                                \
+            fg_flag = 1;                                                \
+        } else {                                                        \
+            int e_a = ppc_##tp##_get_unbiased_exp(xa.fld[i]);           \
+            int e_b = ppc_##tp##_get_unbiased_exp(xb.fld[i]);           \
+                                                                        \
+            if (unlikely(tp##_is_any_nan(xa.fld[i]) ||                  \
+                         tp##_is_any_nan(xb.fld[i]))) {                 \
+                fe_flag = 1;                                            \
+            } else if ((e_b <= emin) || (e_b >= (emax-2))) {            \
+                fe_flag = 1;                                            \
+            } else if (!tp##_is_zero(xa.fld[i]) &&                      \
+                       (((e_a - e_b) >= emax) ||                        \
+                        ((e_a - e_b) <= (emin+1)) ||                    \
+                         (e_a <= (emin+nbits)))) {                      \
+                fe_flag = 1;                                            \
+            }                                                           \
+                                                                        \
+            if (unlikely(tp##_is_zero_or_denormal(xb.fld[i]))) {        \
+                /* XB is not zero because of the above check and */     \
+                /* so must be denormalized.                      */     \
+                fg_flag = 1;                                            \
+            }                                                           \
+        }                                                               \
+    }                                                                   \
+                                                                        \
+    env->crf[BF(opcode)] = 0x8 | (fg_flag ? 4 : 0) | (fe_flag ? 2 : 0); \
+}
+
+VSX_TDIV(xstdivdp, 1, float64, f64, -1022, 1023, 52)
+VSX_TDIV(xvtdivdp, 2, float64, f64, -1022, 1023, 52)
+VSX_TDIV(xvtdivsp, 4, float32, f32, -126, 127, 23)
+
+/* VSX_TSQRT - VSX floating point test for square root
+ *   op    - instruction mnemonic
+ *   nels  - number of elements (1, 2 or 4)
+ *   tp    - type (float32 or float64)
+ *   fld   - vsr_t field (f32 or f64)
+ *   emin  - minimum unbiased exponent
+ *   emax  - maximum unbiased exponent
+ *   nbits - number of fraction bits
+ */
+#define VSX_TSQRT(op, nels, tp, fld, emin, nbits)                       \
+void helper_##op(CPUPPCState *env, uint32_t opcode)                     \
+{                                                                       \
+    ppc_vsr_t xa, xb;                                                   \
+    int i;                                                              \
+    int fe_flag = 0;                                                    \
+    int fg_flag = 0;                                                    \
+                                                                        \
+    getVSR(xA(opcode), &xa, env);                                       \
+    getVSR(xB(opcode), &xb, env);                                       \
+                                                                        \
+    for (i = 0; i < nels; i++) {                                        \
+        if (unlikely(tp##_is_infinity(xb.fld[i]) ||                     \
+                     tp##_is_zero(xb.fld[i]))) {                        \
+            fe_flag = 1;                                                \
+            fg_flag = 1;                                                \
+        } else {                                                        \
+            int e_b = ppc_##tp##_get_unbiased_exp(xb.fld[i]);           \
+                                                                        \
+            if (unlikely(tp##_is_any_nan(xb.fld[i]))) {                 \
+                fe_flag = 1;                                            \
+            } else if (unlikely(tp##_is_zero(xb.fld[i]))) {             \
+                fe_flag = 1;                                            \
+            } else if (unlikely(tp##_is_neg(xb.fld[i]))) {              \
+                fe_flag = 1;                                            \
+            } else if (!tp##_is_zero(xb.fld[i]) &&                      \
+                      (e_b <= (emin+nbits))) {                          \
+                fe_flag = 1;                                            \
+            }                                                           \
+                                                                        \
+            if (unlikely(tp##_is_zero_or_denormal(xb.fld[i]))) {        \
+                /* XB is not zero because of the above check and */     \
+                /* therefore must be denormalized.               */     \
+                fg_flag = 1;                                            \
+            }                                                           \
+        }                                                               \
+    }                                                                   \
+                                                                        \
+    env->crf[BF(opcode)] = 0x8 | (fg_flag ? 4 : 0) | (fe_flag ? 2 : 0); \
+}
+
+VSX_TSQRT(xstsqrtdp, 1, float64, f64, -1022, 52)
+VSX_TSQRT(xvtsqrtdp, 2, float64, f64, -1022, 52)
+VSX_TSQRT(xvtsqrtsp, 4, float32, f32, -126, 23)
+
+/* VSX_MADD - VSX floating point muliply/add variations
+ *   op    - instruction mnemonic
+ *   nels  - number of elements (1, 2 or 4)
+ *   tp    - type (float32 or float64)
+ *   fld   - vsr_t field (f32 or f64)
+ *   maddflgs - flags for the float*muladd routine that control the
+ *           various forms (madd, msub, nmadd, nmsub)
+ *   afrm  - A form (1=A, 0=M)
+ *   sfprf - set FPRF
+ */
+#define VSX_MADD(op, nels, tp, fld, maddflgs, afrm, sfprf, r2sp)              \
+void helper_##op(CPUPPCState *env, uint32_t opcode)                           \
+{                                                                             \
+    ppc_vsr_t xt_in, xa, xb, xt_out;                                          \
+    ppc_vsr_t *b, *c;                                                         \
+    int i;                                                                    \
+                                                                              \
+    if (afrm) { /* AxB + T */                                                 \
+        b = &xb;                                                              \
+        c = &xt_in;                                                           \
+    } else { /* AxT + B */                                                    \
+        b = &xt_in;                                                           \
+        c = &xb;                                                              \
+    }                                                                         \
+                                                                              \
+    getVSR(xA(opcode), &xa, env);                                             \
+    getVSR(xB(opcode), &xb, env);                                             \
+    getVSR(xT(opcode), &xt_in, env);                                          \
+                                                                              \
+    xt_out = xt_in;                                                           \
+                                                                              \
+    helper_reset_fpstatus(env);                                               \
+                                                                              \
+    for (i = 0; i < nels; i++) {                                              \
+        float_status tstat = env->fp_status;                                  \
+        set_float_exception_flags(0, &tstat);                                 \
+        if (r2sp && (tstat.float_rounding_mode == float_round_nearest_even)) {\
+            /* Avoid double rounding errors by rounding the intermediate */   \
+            /* result to odd.                                            */   \
+            set_float_rounding_mode(float_round_to_zero, &tstat);             \
+            xt_out.fld[i] = tp##_muladd(xa.fld[i], b->fld[i], c->fld[i],      \
+                                       maddflgs, &tstat);                     \
+            xt_out.fld[i] |= (get_float_exception_flags(&tstat) &             \
+                              float_flag_inexact) != 0;                       \
+        } else {                                                              \
+            xt_out.fld[i] = tp##_muladd(xa.fld[i], b->fld[i], c->fld[i],      \
+                                        maddflgs, &tstat);                    \
+        }                                                                     \
+        env->fp_status.float_exception_flags |= tstat.float_exception_flags;  \
+                                                                              \
+        if (unlikely(tstat.float_exception_flags & float_flag_invalid)) {     \
+            if (tp##_is_signaling_nan(xa.fld[i]) ||                           \
+                tp##_is_signaling_nan(b->fld[i]) ||                           \
+                tp##_is_signaling_nan(c->fld[i])) {                           \
+                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, sfprf);    \
+                tstat.float_exception_flags &= ~float_flag_invalid;           \
+            }                                                                 \
+            if ((tp##_is_infinity(xa.fld[i]) && tp##_is_zero(b->fld[i])) ||   \
+                (tp##_is_zero(xa.fld[i]) && tp##_is_infinity(b->fld[i]))) {   \
+                xt_out.fld[i] = float64_to_##tp(fload_invalid_op_excp(env,    \
+                    POWERPC_EXCP_FP_VXIMZ, sfprf), &env->fp_status);          \
+                tstat.float_exception_flags &= ~float_flag_invalid;           \
+            }                                                                 \
+            if ((tstat.float_exception_flags & float_flag_invalid) &&         \
+                ((tp##_is_infinity(xa.fld[i]) ||                              \
+                  tp##_is_infinity(b->fld[i])) &&                             \
+                  tp##_is_infinity(c->fld[i]))) {                             \
+                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, sfprf);     \
+            }                                                                 \
+        }                                                                     \
+                                                                              \
+        if (r2sp) {                                                           \
+            xt_out.fld[i] = helper_frsp(env, xt_out.fld[i]);                  \
+        }                                                                     \
+                                                                              \
+        if (sfprf) {                                                          \
+            helper_compute_fprf(env, xt_out.fld[i], sfprf);                   \
+        }                                                                     \
+    }                                                                         \
+    putVSR(xT(opcode), &xt_out, env);                                         \
+    helper_float_check_status(env);                                           \
+}
+
+#define MADD_FLGS 0
+#define MSUB_FLGS float_muladd_negate_c
+#define NMADD_FLGS float_muladd_negate_result
+#define NMSUB_FLGS (float_muladd_negate_c | float_muladd_negate_result)
+
+VSX_MADD(xsmaddadp, 1, float64, f64, MADD_FLGS, 1, 1, 0)
+VSX_MADD(xsmaddmdp, 1, float64, f64, MADD_FLGS, 0, 1, 0)
+VSX_MADD(xsmsubadp, 1, float64, f64, MSUB_FLGS, 1, 1, 0)
+VSX_MADD(xsmsubmdp, 1, float64, f64, MSUB_FLGS, 0, 1, 0)
+VSX_MADD(xsnmaddadp, 1, float64, f64, NMADD_FLGS, 1, 1, 0)
+VSX_MADD(xsnmaddmdp, 1, float64, f64, NMADD_FLGS, 0, 1, 0)
+VSX_MADD(xsnmsubadp, 1, float64, f64, NMSUB_FLGS, 1, 1, 0)
+VSX_MADD(xsnmsubmdp, 1, float64, f64, NMSUB_FLGS, 0, 1, 0)
+
+VSX_MADD(xsmaddasp, 1, float64, f64, MADD_FLGS, 1, 1, 1)
+VSX_MADD(xsmaddmsp, 1, float64, f64, MADD_FLGS, 0, 1, 1)
+VSX_MADD(xsmsubasp, 1, float64, f64, MSUB_FLGS, 1, 1, 1)
+VSX_MADD(xsmsubmsp, 1, float64, f64, MSUB_FLGS, 0, 1, 1)
+VSX_MADD(xsnmaddasp, 1, float64, f64, NMADD_FLGS, 1, 1, 1)
+VSX_MADD(xsnmaddmsp, 1, float64, f64, NMADD_FLGS, 0, 1, 1)
+VSX_MADD(xsnmsubasp, 1, float64, f64, NMSUB_FLGS, 1, 1, 1)
+VSX_MADD(xsnmsubmsp, 1, float64, f64, NMSUB_FLGS, 0, 1, 1)
+
+VSX_MADD(xvmaddadp, 2, float64, f64, MADD_FLGS, 1, 0, 0)
+VSX_MADD(xvmaddmdp, 2, float64, f64, MADD_FLGS, 0, 0, 0)
+VSX_MADD(xvmsubadp, 2, float64, f64, MSUB_FLGS, 1, 0, 0)
+VSX_MADD(xvmsubmdp, 2, float64, f64, MSUB_FLGS, 0, 0, 0)
+VSX_MADD(xvnmaddadp, 2, float64, f64, NMADD_FLGS, 1, 0, 0)
+VSX_MADD(xvnmaddmdp, 2, float64, f64, NMADD_FLGS, 0, 0, 0)
+VSX_MADD(xvnmsubadp, 2, float64, f64, NMSUB_FLGS, 1, 0, 0)
+VSX_MADD(xvnmsubmdp, 2, float64, f64, NMSUB_FLGS, 0, 0, 0)
+
+VSX_MADD(xvmaddasp, 4, float32, f32, MADD_FLGS, 1, 0, 0)
+VSX_MADD(xvmaddmsp, 4, float32, f32, MADD_FLGS, 0, 0, 0)
+VSX_MADD(xvmsubasp, 4, float32, f32, MSUB_FLGS, 1, 0, 0)
+VSX_MADD(xvmsubmsp, 4, float32, f32, MSUB_FLGS, 0, 0, 0)
+VSX_MADD(xvnmaddasp, 4, float32, f32, NMADD_FLGS, 1, 0, 0)
+VSX_MADD(xvnmaddmsp, 4, float32, f32, NMADD_FLGS, 0, 0, 0)
+VSX_MADD(xvnmsubasp, 4, float32, f32, NMSUB_FLGS, 1, 0, 0)
+VSX_MADD(xvnmsubmsp, 4, float32, f32, NMSUB_FLGS, 0, 0, 0)
+
+#define VSX_SCALAR_CMP(op, ordered)                                      \
+void helper_##op(CPUPPCState *env, uint32_t opcode)                      \
+{                                                                        \
+    ppc_vsr_t xa, xb;                                                    \
+    uint32_t cc = 0;                                                     \
+                                                                         \
+    getVSR(xA(opcode), &xa, env);                                        \
+    getVSR(xB(opcode), &xb, env);                                        \
+                                                                         \
+    if (unlikely(float64_is_any_nan(xa.f64[0]) ||                        \
+                 float64_is_any_nan(xb.f64[0]))) {                       \
+        if (float64_is_signaling_nan(xa.f64[0]) ||                       \
+            float64_is_signaling_nan(xb.f64[0])) {                       \
+            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0);       \
+        }                                                                \
+        if (ordered) {                                                   \
+            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXVC, 0);         \
+        }                                                                \
+        cc = 1;                                                          \
+    } else {                                                             \
+        if (float64_lt(xa.f64[0], xb.f64[0], &env->fp_status)) {         \
+            cc = 8;                                                      \
+        } else if (!float64_le(xa.f64[0], xb.f64[0], &env->fp_status)) { \
+            cc = 4;                                                      \
+        } else {                                                         \
+            cc = 2;                                                      \
+        }                                                                \
+    }                                                                    \
+                                                                         \
+    env->fpscr &= ~(0x0F << FPSCR_FPRF);                                 \
+    env->fpscr |= cc << FPSCR_FPRF;                                      \
+    env->crf[BF(opcode)] = cc;                                           \
+                                                                         \
+    helper_float_check_status(env);                                      \
+}
+
+VSX_SCALAR_CMP(xscmpodp, 1)
+VSX_SCALAR_CMP(xscmpudp, 0)
+
+#define float64_snan_to_qnan(x) ((x) | 0x0008000000000000ULL)
+#define float32_snan_to_qnan(x) ((x) | 0x00400000)
+
+/* VSX_MAX_MIN - VSX floating point maximum/minimum
+ *   name  - instruction mnemonic
+ *   op    - operation (max or min)
+ *   nels  - number of elements (1, 2 or 4)
+ *   tp    - type (float32 or float64)
+ *   fld   - vsr_t field (f32 or f64)
+ */
+#define VSX_MAX_MIN(name, op, nels, tp, fld)                                  \
+void helper_##name(CPUPPCState *env, uint32_t opcode)                         \
+{                                                                             \
+    ppc_vsr_t xt, xa, xb;                                                     \
+    int i;                                                                    \
+                                                                              \
+    getVSR(xA(opcode), &xa, env);                                             \
+    getVSR(xB(opcode), &xb, env);                                             \
+    getVSR(xT(opcode), &xt, env);                                             \
+                                                                              \
+    for (i = 0; i < nels; i++) {                                              \
+        xt.fld[i] = tp##_##op(xa.fld[i], xb.fld[i], &env->fp_status);         \
+        if (unlikely(tp##_is_signaling_nan(xa.fld[i]) ||                      \
+                     tp##_is_signaling_nan(xb.fld[i]))) {                     \
+            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0);            \
+        }                                                                     \
+    }                                                                         \
+                                                                              \
+    putVSR(xT(opcode), &xt, env);                                             \
+    helper_float_check_status(env);                                           \
+}
+
+VSX_MAX_MIN(xsmaxdp, maxnum, 1, float64, f64)
+VSX_MAX_MIN(xvmaxdp, maxnum, 2, float64, f64)
+VSX_MAX_MIN(xvmaxsp, maxnum, 4, float32, f32)
+VSX_MAX_MIN(xsmindp, minnum, 1, float64, f64)
+VSX_MAX_MIN(xvmindp, minnum, 2, float64, f64)
+VSX_MAX_MIN(xvminsp, minnum, 4, float32, f32)
+
+/* VSX_CMP - VSX floating point compare
+ *   op    - instruction mnemonic
+ *   nels  - number of elements (1, 2 or 4)
+ *   tp    - type (float32 or float64)
+ *   fld   - vsr_t field (f32 or f64)
+ *   cmp   - comparison operation
+ *   svxvc - set VXVC bit
+ */
+#define VSX_CMP(op, nels, tp, fld, cmp, svxvc)                            \
+void helper_##op(CPUPPCState *env, uint32_t opcode)                       \
+{                                                                         \
+    ppc_vsr_t xt, xa, xb;                                                 \
+    int i;                                                                \
+    int all_true = 1;                                                     \
+    int all_false = 1;                                                    \
+                                                                          \
+    getVSR(xA(opcode), &xa, env);                                         \
+    getVSR(xB(opcode), &xb, env);                                         \
+    getVSR(xT(opcode), &xt, env);                                         \
+                                                                          \
+    for (i = 0; i < nels; i++) {                                          \
+        if (unlikely(tp##_is_any_nan(xa.fld[i]) ||                        \
+                     tp##_is_any_nan(xb.fld[i]))) {                       \
+            if (tp##_is_signaling_nan(xa.fld[i]) ||                       \
+                tp##_is_signaling_nan(xb.fld[i])) {                       \
+                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0);    \
+            }                                                             \
+            if (svxvc) {                                                  \
+                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXVC, 0);      \
+            }                                                             \
+            xt.fld[i] = 0;                                                \
+            all_true = 0;                                                 \
+        } else {                                                          \
+            if (tp##_##cmp(xb.fld[i], xa.fld[i], &env->fp_status) == 1) { \
+                xt.fld[i] = -1;                                           \
+                all_false = 0;                                            \
+            } else {                                                      \
+                xt.fld[i] = 0;                                            \
+                all_true = 0;                                             \
+            }                                                             \
+        }                                                                 \
+    }                                                                     \
+                                                                          \
+    putVSR(xT(opcode), &xt, env);                                         \
+    if ((opcode >> (31-21)) & 1) {                                        \
+        env->crf[6] = (all_true ? 0x8 : 0) | (all_false ? 0x2 : 0);       \
+    }                                                                     \
+    helper_float_check_status(env);                                       \
+ }
+
+VSX_CMP(xvcmpeqdp, 2, float64, f64, eq, 0)
+VSX_CMP(xvcmpgedp, 2, float64, f64, le, 1)
+VSX_CMP(xvcmpgtdp, 2, float64, f64, lt, 1)
+VSX_CMP(xvcmpeqsp, 4, float32, f32, eq, 0)
+VSX_CMP(xvcmpgesp, 4, float32, f32, le, 1)
+VSX_CMP(xvcmpgtsp, 4, float32, f32, lt, 1)
+
+#if defined(HOST_WORDS_BIGENDIAN)
+#define JOFFSET 0
+#else
+#define JOFFSET 1
+#endif
+
+/* VSX_CVT_FP_TO_FP - VSX floating point/floating point conversion
+ *   op    - instruction mnemonic
+ *   nels  - number of elements (1, 2 or 4)
+ *   stp   - source type (float32 or float64)
+ *   ttp   - target type (float32 or float64)
+ *   sfld  - source vsr_t field
+ *   tfld  - target vsr_t field (f32 or f64)
+ *   sfprf - set FPRF
+ */
+#define VSX_CVT_FP_TO_FP(op, nels, stp, ttp, sfld, tfld, sfprf)    \
+void helper_##op(CPUPPCState *env, uint32_t opcode)                \
+{                                                                  \
+    ppc_vsr_t xt, xb;                                              \
+    int i;                                                         \
+                                                                   \
+    getVSR(xB(opcode), &xb, env);                                  \
+    getVSR(xT(opcode), &xt, env);                                  \
+                                                                   \
+    for (i = 0; i < nels; i++) {                                   \
+        int j = 2*i + JOFFSET;                                     \
+        xt.tfld = stp##_to_##ttp(xb.sfld, &env->fp_status);        \
+        if (unlikely(stp##_is_signaling_nan(xb.sfld))) {           \
+            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0); \
+            xt.tfld = ttp##_snan_to_qnan(xt.tfld);                 \
+        }                                                          \
+        if (sfprf) {                                               \
+            helper_compute_fprf(env, ttp##_to_float64(xt.tfld,     \
+                                &env->fp_status), sfprf);          \
+        }                                                          \
+    }                                                              \
+                                                                   \
+    putVSR(xT(opcode), &xt, env);                                  \
+    helper_float_check_status(env);                                \
+}
+
+VSX_CVT_FP_TO_FP(xscvdpsp, 1, float64, float32, f64[i], f32[j], 1)
+VSX_CVT_FP_TO_FP(xscvspdp, 1, float32, float64, f32[j], f64[i], 1)
+VSX_CVT_FP_TO_FP(xvcvdpsp, 2, float64, float32, f64[i], f32[j], 0)
+VSX_CVT_FP_TO_FP(xvcvspdp, 2, float32, float64, f32[j], f64[i], 0)
+
+uint64_t helper_xscvdpspn(CPUPPCState *env, uint64_t xb)
+{
+    float_status tstat = env->fp_status;
+    set_float_exception_flags(0, &tstat);
+
+    return (uint64_t)float64_to_float32(xb, &tstat) << 32;
+}
+
+uint64_t helper_xscvspdpn(CPUPPCState *env, uint64_t xb)
+{
+    float_status tstat = env->fp_status;
+    set_float_exception_flags(0, &tstat);
+
+    return float32_to_float64(xb >> 32, &tstat);
+}
+
+/* VSX_CVT_FP_TO_INT - VSX floating point to integer conversion
+ *   op    - instruction mnemonic
+ *   nels  - number of elements (1, 2 or 4)
+ *   stp   - source type (float32 or float64)
+ *   ttp   - target type (int32, uint32, int64 or uint64)
+ *   sfld  - source vsr_t field
+ *   tfld  - target vsr_t field
+ *   jdef  - definition of the j index (i or 2*i)
+ *   rnan  - resulting NaN
+ */
+#define VSX_CVT_FP_TO_INT(op, nels, stp, ttp, sfld, tfld, jdef, rnan)        \
+void helper_##op(CPUPPCState *env, uint32_t opcode)                          \
+{                                                                            \
+    ppc_vsr_t xt, xb;                                                        \
+    int i;                                                                   \
+                                                                             \
+    getVSR(xB(opcode), &xb, env);                                            \
+    getVSR(xT(opcode), &xt, env);                                            \
+                                                                             \
+    for (i = 0; i < nels; i++) {                                             \
+        int j = jdef;                                                        \
+        if (unlikely(stp##_is_any_nan(xb.sfld))) {                           \
+            if (stp##_is_signaling_nan(xb.sfld)) {                           \
+                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0);       \
+            }                                                                \
+            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXCVI, 0);            \
+            xt.tfld = rnan;                                                  \
+        } else {                                                             \
+            xt.tfld = stp##_to_##ttp(xb.sfld, &env->fp_status);              \
+            if (env->fp_status.float_exception_flags & float_flag_invalid) { \
+                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXCVI, 0);        \
+            }                                                                \
+        }                                                                    \
+    }                                                                        \
+                                                                             \
+    putVSR(xT(opcode), &xt, env);                                            \
+    helper_float_check_status(env);                                          \
+}
+
+VSX_CVT_FP_TO_INT(xscvdpsxds, 1, float64, int64, f64[j], u64[i], i, \
+                  0x8000000000000000ULL)
+VSX_CVT_FP_TO_INT(xscvdpsxws, 1, float64, int32, f64[i], u32[j], \
+                  2*i + JOFFSET, 0x80000000U)
+VSX_CVT_FP_TO_INT(xscvdpuxds, 1, float64, uint64, f64[j], u64[i], i, 0ULL)
+VSX_CVT_FP_TO_INT(xscvdpuxws, 1, float64, uint32, f64[i], u32[j], \
+                  2*i + JOFFSET, 0U)
+VSX_CVT_FP_TO_INT(xvcvdpsxds, 2, float64, int64, f64[j], u64[i], i, \
+                  0x8000000000000000ULL)
+VSX_CVT_FP_TO_INT(xvcvdpsxws, 2, float64, int32, f64[i], u32[j], \
+                  2*i + JOFFSET, 0x80000000U)
+VSX_CVT_FP_TO_INT(xvcvdpuxds, 2, float64, uint64, f64[j], u64[i], i, 0ULL)
+VSX_CVT_FP_TO_INT(xvcvdpuxws, 2, float64, uint32, f64[i], u32[j], \
+                  2*i + JOFFSET, 0U)
+VSX_CVT_FP_TO_INT(xvcvspsxds, 2, float32, int64, f32[j], u64[i], \
+                  2*i + JOFFSET, 0x8000000000000000ULL)
+VSX_CVT_FP_TO_INT(xvcvspsxws, 4, float32, int32, f32[j], u32[j], i, \
+                  0x80000000U)
+VSX_CVT_FP_TO_INT(xvcvspuxds, 2, float32, uint64, f32[j], u64[i], \
+                  2*i + JOFFSET, 0ULL)
+VSX_CVT_FP_TO_INT(xvcvspuxws, 4, float32, uint32, f32[j], u32[i], i, 0U)
+
+/* VSX_CVT_INT_TO_FP - VSX integer to floating point conversion
+ *   op    - instruction mnemonic
+ *   nels  - number of elements (1, 2 or 4)
+ *   stp   - source type (int32, uint32, int64 or uint64)
+ *   ttp   - target type (float32 or float64)
+ *   sfld  - source vsr_t field
+ *   tfld  - target vsr_t field
+ *   jdef  - definition of the j index (i or 2*i)
+ *   sfprf - set FPRF
+ */
+#define VSX_CVT_INT_TO_FP(op, nels, stp, ttp, sfld, tfld, jdef, sfprf, r2sp) \
+void helper_##op(CPUPPCState *env, uint32_t opcode)                     \
+{                                                                       \
+    ppc_vsr_t xt, xb;                                                   \
+    int i;                                                              \
+                                                                        \
+    getVSR(xB(opcode), &xb, env);                                       \
+    getVSR(xT(opcode), &xt, env);                                       \
+                                                                        \
+    for (i = 0; i < nels; i++) {                                        \
+        int j = jdef;                                                   \
+        xt.tfld = stp##_to_##ttp(xb.sfld, &env->fp_status);             \
+        if (r2sp) {                                                     \
+            xt.tfld = helper_frsp(env, xt.tfld);                        \
+        }                                                               \
+        if (sfprf) {                                                    \
+            helper_compute_fprf(env, xt.tfld, sfprf);                   \
+        }                                                               \
+    }                                                                   \
+                                                                        \
+    putVSR(xT(opcode), &xt, env);                                       \
+    helper_float_check_status(env);                                     \
+}
+
+VSX_CVT_INT_TO_FP(xscvsxddp, 1, int64, float64, u64[j], f64[i], i, 1, 0)
+VSX_CVT_INT_TO_FP(xscvuxddp, 1, uint64, float64, u64[j], f64[i], i, 1, 0)
+VSX_CVT_INT_TO_FP(xscvsxdsp, 1, int64, float64, u64[j], f64[i], i, 1, 1)
+VSX_CVT_INT_TO_FP(xscvuxdsp, 1, uint64, float64, u64[j], f64[i], i, 1, 1)
+VSX_CVT_INT_TO_FP(xvcvsxddp, 2, int64, float64, u64[j], f64[i], i, 0, 0)
+VSX_CVT_INT_TO_FP(xvcvuxddp, 2, uint64, float64, u64[j], f64[i], i, 0, 0)
+VSX_CVT_INT_TO_FP(xvcvsxwdp, 2, int32, float64, u32[j], f64[i], \
+                  2*i + JOFFSET, 0, 0)
+VSX_CVT_INT_TO_FP(xvcvuxwdp, 2, uint64, float64, u32[j], f64[i], \
+                  2*i + JOFFSET, 0, 0)
+VSX_CVT_INT_TO_FP(xvcvsxdsp, 2, int64, float32, u64[i], f32[j], \
+                  2*i + JOFFSET, 0, 0)
+VSX_CVT_INT_TO_FP(xvcvuxdsp, 2, uint64, float32, u64[i], f32[j], \
+                  2*i + JOFFSET, 0, 0)
+VSX_CVT_INT_TO_FP(xvcvsxwsp, 4, int32, float32, u32[j], f32[i], i, 0, 0)
+VSX_CVT_INT_TO_FP(xvcvuxwsp, 4, uint32, float32, u32[j], f32[i], i, 0, 0)
+
+/* For "use current rounding mode", define a value that will not be one of
+ * the existing rounding model enums.
+ */
+#define FLOAT_ROUND_CURRENT (float_round_nearest_even + float_round_down + \
+  float_round_up + float_round_to_zero)
+
+/* VSX_ROUND - VSX floating point round
+ *   op    - instruction mnemonic
+ *   nels  - number of elements (1, 2 or 4)
+ *   tp    - type (float32 or float64)
+ *   fld   - vsr_t field (f32 or f64)
+ *   rmode - rounding mode
+ *   sfprf - set FPRF
+ */
+#define VSX_ROUND(op, nels, tp, fld, rmode, sfprf)                     \
+void helper_##op(CPUPPCState *env, uint32_t opcode)                    \
+{                                                                      \
+    ppc_vsr_t xt, xb;                                                  \
+    int i;                                                             \
+    getVSR(xB(opcode), &xb, env);                                      \
+    getVSR(xT(opcode), &xt, env);                                      \
+                                                                       \
+    if (rmode != FLOAT_ROUND_CURRENT) {                                \
+        set_float_rounding_mode(rmode, &env->fp_status);               \
+    }                                                                  \
+                                                                       \
+    for (i = 0; i < nels; i++) {                                       \
+        if (unlikely(tp##_is_signaling_nan(xb.fld[i]))) {              \
+            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0);     \
+            xt.fld[i] = tp##_snan_to_qnan(xb.fld[i]);                  \
+        } else {                                                       \
+            xt.fld[i] = tp##_round_to_int(xb.fld[i], &env->fp_status); \
+        }                                                              \
+        if (sfprf) {                                                   \
+            helper_compute_fprf(env, xt.fld[i], sfprf);                \
+        }                                                              \
+    }                                                                  \
+                                                                       \
+    /* If this is not a "use current rounding mode" instruction,       \
+     * then inhibit setting of the XX bit and restore rounding         \
+     * mode from FPSCR */                                              \
+    if (rmode != FLOAT_ROUND_CURRENT) {                                \
+        fpscr_set_rounding_mode(env);                                  \
+        env->fp_status.float_exception_flags &= ~float_flag_inexact;   \
+    }                                                                  \
+                                                                       \
+    putVSR(xT(opcode), &xt, env);                                      \
+    helper_float_check_status(env);                                    \
+}
+
+VSX_ROUND(xsrdpi, 1, float64, f64, float_round_nearest_even, 1)
+VSX_ROUND(xsrdpic, 1, float64, f64, FLOAT_ROUND_CURRENT, 1)
+VSX_ROUND(xsrdpim, 1, float64, f64, float_round_down, 1)
+VSX_ROUND(xsrdpip, 1, float64, f64, float_round_up, 1)
+VSX_ROUND(xsrdpiz, 1, float64, f64, float_round_to_zero, 1)
+
+VSX_ROUND(xvrdpi, 2, float64, f64, float_round_nearest_even, 0)
+VSX_ROUND(xvrdpic, 2, float64, f64, FLOAT_ROUND_CURRENT, 0)
+VSX_ROUND(xvrdpim, 2, float64, f64, float_round_down, 0)
+VSX_ROUND(xvrdpip, 2, float64, f64, float_round_up, 0)
+VSX_ROUND(xvrdpiz, 2, float64, f64, float_round_to_zero, 0)
+
+VSX_ROUND(xvrspi, 4, float32, f32, float_round_nearest_even, 0)
+VSX_ROUND(xvrspic, 4, float32, f32, FLOAT_ROUND_CURRENT, 0)
+VSX_ROUND(xvrspim, 4, float32, f32, float_round_down, 0)
+VSX_ROUND(xvrspip, 4, float32, f32, float_round_up, 0)
+VSX_ROUND(xvrspiz, 4, float32, f32, float_round_to_zero, 0)
+
+uint64_t helper_xsrsp(CPUPPCState *env, uint64_t xb)
+{
+    helper_reset_fpstatus(env);
+
+    uint64_t xt = helper_frsp(env, xb);
+
+    helper_compute_fprf(env, xt, 1);
+    helper_float_check_status(env);
+    return xt;
+}
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 6d282bb32d..99f10deee1 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -31,7 +31,11 @@ DEF_HELPER_5(lscbx, tl, env, tl, i32, i32, i32)
 
 #if defined(TARGET_PPC64)
 DEF_HELPER_3(mulldo, i64, env, i64, i64)
+DEF_HELPER_4(divdeu, i64, env, i64, i64, i32)
+DEF_HELPER_4(divde, i64, env, i64, i64, i32)
 #endif
+DEF_HELPER_4(divweu, tl, env, tl, tl, i32)
+DEF_HELPER_4(divwe, tl, env, tl, tl, i32)
 
 DEF_HELPER_FLAGS_1(cntlzw, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(popcntb, TCG_CALL_NO_RWG_SE, tl, tl)
@@ -41,6 +45,7 @@ DEF_HELPER_3(sraw, tl, env, tl, tl)
 #if defined(TARGET_PPC64)
 DEF_HELPER_FLAGS_1(cntlzd, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(popcntd, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_2(bpermd, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_3(srad, tl, env, tl, tl)
 #endif
 
@@ -61,11 +66,18 @@ DEF_HELPER_4(fcmpo, void, env, i64, i64, i32)
 DEF_HELPER_4(fcmpu, void, env, i64, i64, i32)
 
 DEF_HELPER_2(fctiw, i64, env, i64)
+DEF_HELPER_2(fctiwu, i64, env, i64)
 DEF_HELPER_2(fctiwz, i64, env, i64)
+DEF_HELPER_2(fctiwuz, i64, env, i64)
 #if defined(TARGET_PPC64)
 DEF_HELPER_2(fcfid, i64, env, i64)
+DEF_HELPER_2(fcfidu, i64, env, i64)
+DEF_HELPER_2(fcfids, i64, env, i64)
+DEF_HELPER_2(fcfidus, i64, env, i64)
 DEF_HELPER_2(fctid, i64, env, i64)
+DEF_HELPER_2(fctidu, i64, env, i64)
 DEF_HELPER_2(fctidz, i64, env, i64)
+DEF_HELPER_2(fctiduz, i64, env, i64)
 #endif
 DEF_HELPER_2(frsp, i64, env, i64)
 DEF_HELPER_2(frin, i64, env, i64)
@@ -87,6 +99,9 @@ DEF_HELPER_2(fres, i64, env, i64)
 DEF_HELPER_2(frsqrte, i64, env, i64)
 DEF_HELPER_4(fsel, i64, env, i64, i64, i64)
 
+DEF_HELPER_FLAGS_2(ftdiv, TCG_CALL_NO_RWG_SE, i32, i64, i64)
+DEF_HELPER_FLAGS_1(ftsqrt, TCG_CALL_NO_RWG_SE, i32, i64)
+
 #define dh_alias_avr ptr
 #define dh_ctype_avr ppc_avr_t *
 #define dh_is_signed_avr dh_is_signed_ptr
@@ -94,9 +109,11 @@ DEF_HELPER_4(fsel, i64, env, i64, i64, i64)
 DEF_HELPER_3(vaddubm, void, avr, avr, avr)
 DEF_HELPER_3(vadduhm, void, avr, avr, avr)
 DEF_HELPER_3(vadduwm, void, avr, avr, avr)
+DEF_HELPER_3(vaddudm, void, avr, avr, avr)
 DEF_HELPER_3(vsububm, void, avr, avr, avr)
 DEF_HELPER_3(vsubuhm, void, avr, avr, avr)
 DEF_HELPER_3(vsubuwm, void, avr, avr, avr)
+DEF_HELPER_3(vsubudm, void, avr, avr, avr)
 DEF_HELPER_3(vavgub, void, avr, avr, avr)
 DEF_HELPER_3(vavguh, void, avr, avr, avr)
 DEF_HELPER_3(vavguw, void, avr, avr, avr)
@@ -106,24 +123,31 @@ DEF_HELPER_3(vavgsw, void, avr, avr, avr)
 DEF_HELPER_3(vminsb, void, avr, avr, avr)
 DEF_HELPER_3(vminsh, void, avr, avr, avr)
 DEF_HELPER_3(vminsw, void, avr, avr, avr)
+DEF_HELPER_3(vminsd, void, avr, avr, avr)
 DEF_HELPER_3(vmaxsb, void, avr, avr, avr)
 DEF_HELPER_3(vmaxsh, void, avr, avr, avr)
 DEF_HELPER_3(vmaxsw, void, avr, avr, avr)
+DEF_HELPER_3(vmaxsd, void, avr, avr, avr)
 DEF_HELPER_3(vminub, void, avr, avr, avr)
 DEF_HELPER_3(vminuh, void, avr, avr, avr)
 DEF_HELPER_3(vminuw, void, avr, avr, avr)
+DEF_HELPER_3(vminud, void, avr, avr, avr)
 DEF_HELPER_3(vmaxub, void, avr, avr, avr)
 DEF_HELPER_3(vmaxuh, void, avr, avr, avr)
 DEF_HELPER_3(vmaxuw, void, avr, avr, avr)
+DEF_HELPER_3(vmaxud, void, avr, avr, avr)
 DEF_HELPER_4(vcmpequb, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpequh, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpequw, void, env, avr, avr, avr)
+DEF_HELPER_4(vcmpequd, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpgtub, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpgtuh, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpgtuw, void, env, avr, avr, avr)
+DEF_HELPER_4(vcmpgtud, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpgtsb, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpgtsh, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpgtsw, void, env, avr, avr, avr)
+DEF_HELPER_4(vcmpgtsd, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpeqfp, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpgefp, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpgtfp, void, env, avr, avr, avr)
@@ -131,12 +155,15 @@ DEF_HELPER_4(vcmpbfp, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpequb_dot, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpequh_dot, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpequw_dot, void, env, avr, avr, avr)
+DEF_HELPER_4(vcmpequd_dot, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpgtub_dot, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpgtuh_dot, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpgtuw_dot, void, env, avr, avr, avr)
+DEF_HELPER_4(vcmpgtud_dot, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpgtsb_dot, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpgtsh_dot, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpgtsw_dot, void, env, avr, avr, avr)
+DEF_HELPER_4(vcmpgtsd_dot, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpeqfp_dot, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpgefp_dot, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpgtfp_dot, void, env, avr, avr, avr)
@@ -149,21 +176,29 @@ DEF_HELPER_3(vmrghh, void, avr, avr, avr)
 DEF_HELPER_3(vmrghw, void, avr, avr, avr)
 DEF_HELPER_3(vmulesb, void, avr, avr, avr)
 DEF_HELPER_3(vmulesh, void, avr, avr, avr)
+DEF_HELPER_3(vmulesw, void, avr, avr, avr)
 DEF_HELPER_3(vmuleub, void, avr, avr, avr)
 DEF_HELPER_3(vmuleuh, void, avr, avr, avr)
+DEF_HELPER_3(vmuleuw, void, avr, avr, avr)
 DEF_HELPER_3(vmulosb, void, avr, avr, avr)
 DEF_HELPER_3(vmulosh, void, avr, avr, avr)
+DEF_HELPER_3(vmulosw, void, avr, avr, avr)
 DEF_HELPER_3(vmuloub, void, avr, avr, avr)
 DEF_HELPER_3(vmulouh, void, avr, avr, avr)
+DEF_HELPER_3(vmulouw, void, avr, avr, avr)
+DEF_HELPER_3(vmuluwm, void, avr, avr, avr)
 DEF_HELPER_3(vsrab, void, avr, avr, avr)
 DEF_HELPER_3(vsrah, void, avr, avr, avr)
 DEF_HELPER_3(vsraw, void, avr, avr, avr)
+DEF_HELPER_3(vsrad, void, avr, avr, avr)
 DEF_HELPER_3(vsrb, void, avr, avr, avr)
 DEF_HELPER_3(vsrh, void, avr, avr, avr)
 DEF_HELPER_3(vsrw, void, avr, avr, avr)
+DEF_HELPER_3(vsrd, void, avr, avr, avr)
 DEF_HELPER_3(vslb, void, avr, avr, avr)
 DEF_HELPER_3(vslh, void, avr, avr, avr)
 DEF_HELPER_3(vslw, void, avr, avr, avr)
+DEF_HELPER_3(vsld, void, avr, avr, avr)
 DEF_HELPER_3(vslo, void, avr, avr, avr)
 DEF_HELPER_3(vsro, void, avr, avr, avr)
 DEF_HELPER_3(vaddcuw, void, avr, avr, avr)
@@ -182,9 +217,18 @@ DEF_HELPER_4(vadduws, void, env, avr, avr, avr)
 DEF_HELPER_4(vsububs, void, env, avr, avr, avr)
 DEF_HELPER_4(vsubuhs, void, env, avr, avr, avr)
 DEF_HELPER_4(vsubuws, void, env, avr, avr, avr)
+DEF_HELPER_3(vadduqm, void, avr, avr, avr)
+DEF_HELPER_4(vaddecuq, void, avr, avr, avr, avr)
+DEF_HELPER_4(vaddeuqm, void, avr, avr, avr, avr)
+DEF_HELPER_3(vaddcuq, void, avr, avr, avr)
+DEF_HELPER_3(vsubuqm, void, avr, avr, avr)
+DEF_HELPER_4(vsubecuq, void, avr, avr, avr, avr)
+DEF_HELPER_4(vsubeuqm, void, avr, avr, avr, avr)
+DEF_HELPER_3(vsubcuq, void, avr, avr, avr)
 DEF_HELPER_3(vrlb, void, avr, avr, avr)
 DEF_HELPER_3(vrlh, void, avr, avr, avr)
 DEF_HELPER_3(vrlw, void, avr, avr, avr)
+DEF_HELPER_3(vrld, void, avr, avr, avr)
 DEF_HELPER_3(vsl, void, avr, avr, avr)
 DEF_HELPER_3(vsr, void, avr, avr, avr)
 DEF_HELPER_4(vsldoi, void, avr, avr, avr, i32)
@@ -198,8 +242,10 @@ DEF_HELPER_2(vupkhpx, void, avr, avr)
 DEF_HELPER_2(vupklpx, void, avr, avr)
 DEF_HELPER_2(vupkhsb, void, avr, avr)
 DEF_HELPER_2(vupkhsh, void, avr, avr)
+DEF_HELPER_2(vupkhsw, void, avr, avr)
 DEF_HELPER_2(vupklsb, void, avr, avr)
 DEF_HELPER_2(vupklsh, void, avr, avr)
+DEF_HELPER_2(vupklsw, void, avr, avr)
 DEF_HELPER_5(vmsumubm, void, env, avr, avr, avr, avr)
 DEF_HELPER_5(vmsummbm, void, env, avr, avr, avr, avr)
 DEF_HELPER_5(vsel, void, env, avr, avr, avr, avr)
@@ -208,10 +254,14 @@ DEF_HELPER_4(vpkshss, void, env, avr, avr, avr)
 DEF_HELPER_4(vpkshus, void, env, avr, avr, avr)
 DEF_HELPER_4(vpkswss, void, env, avr, avr, avr)
 DEF_HELPER_4(vpkswus, void, env, avr, avr, avr)
+DEF_HELPER_4(vpksdss, void, env, avr, avr, avr)
+DEF_HELPER_4(vpksdus, void, env, avr, avr, avr)
 DEF_HELPER_4(vpkuhus, void, env, avr, avr, avr)
 DEF_HELPER_4(vpkuwus, void, env, avr, avr, avr)
+DEF_HELPER_4(vpkudus, void, env, avr, avr, avr)
 DEF_HELPER_4(vpkuhum, void, env, avr, avr, avr)
 DEF_HELPER_4(vpkuwum, void, env, avr, avr, avr)
+DEF_HELPER_4(vpkudum, void, env, avr, avr, avr)
 DEF_HELPER_3(vpkpx, void, avr, avr, avr)
 DEF_HELPER_5(vmhaddshs, void, env, avr, avr, avr, avr)
 DEF_HELPER_5(vmhraddshs, void, env, avr, avr, avr, avr)
@@ -251,6 +301,163 @@ DEF_HELPER_4(vcfsx, void, env, avr, avr, i32)
 DEF_HELPER_4(vctuxs, void, env, avr, avr, i32)
 DEF_HELPER_4(vctsxs, void, env, avr, avr, i32)
 
+DEF_HELPER_2(vclzb, void, avr, avr)
+DEF_HELPER_2(vclzh, void, avr, avr)
+DEF_HELPER_2(vclzw, void, avr, avr)
+DEF_HELPER_2(vclzd, void, avr, avr)
+DEF_HELPER_2(vpopcntb, void, avr, avr)
+DEF_HELPER_2(vpopcnth, void, avr, avr)
+DEF_HELPER_2(vpopcntw, void, avr, avr)
+DEF_HELPER_2(vpopcntd, void, avr, avr)
+DEF_HELPER_3(vbpermq, void, avr, avr, avr)
+DEF_HELPER_2(vgbbd, void, avr, avr)
+DEF_HELPER_3(vpmsumb, void, avr, avr, avr)
+DEF_HELPER_3(vpmsumh, void, avr, avr, avr)
+DEF_HELPER_3(vpmsumw, void, avr, avr, avr)
+DEF_HELPER_3(vpmsumd, void, avr, avr, avr)
+
+DEF_HELPER_2(vsbox, void, avr, avr)
+DEF_HELPER_3(vcipher, void, avr, avr, avr)
+DEF_HELPER_3(vcipherlast, void, avr, avr, avr)
+DEF_HELPER_3(vncipher, void, avr, avr, avr)
+DEF_HELPER_3(vncipherlast, void, avr, avr, avr)
+DEF_HELPER_3(vshasigmaw, void, avr, avr, i32)
+DEF_HELPER_3(vshasigmad, void, avr, avr, i32)
+DEF_HELPER_4(vpermxor, void, avr, avr, avr, avr)
+
+DEF_HELPER_4(bcdadd, i32, avr, avr, avr, i32)
+DEF_HELPER_4(bcdsub, i32, avr, avr, avr, i32)
+
+DEF_HELPER_2(xsadddp, void, env, i32)
+DEF_HELPER_2(xssubdp, void, env, i32)
+DEF_HELPER_2(xsmuldp, void, env, i32)
+DEF_HELPER_2(xsdivdp, void, env, i32)
+DEF_HELPER_2(xsredp, void, env, i32)
+DEF_HELPER_2(xssqrtdp, void, env, i32)
+DEF_HELPER_2(xsrsqrtedp, void, env, i32)
+DEF_HELPER_2(xstdivdp, void, env, i32)
+DEF_HELPER_2(xstsqrtdp, void, env, i32)
+DEF_HELPER_2(xsmaddadp, void, env, i32)
+DEF_HELPER_2(xsmaddmdp, void, env, i32)
+DEF_HELPER_2(xsmsubadp, void, env, i32)
+DEF_HELPER_2(xsmsubmdp, void, env, i32)
+DEF_HELPER_2(xsnmaddadp, void, env, i32)
+DEF_HELPER_2(xsnmaddmdp, void, env, i32)
+DEF_HELPER_2(xsnmsubadp, void, env, i32)
+DEF_HELPER_2(xsnmsubmdp, void, env, i32)
+DEF_HELPER_2(xscmpodp, void, env, i32)
+DEF_HELPER_2(xscmpudp, void, env, i32)
+DEF_HELPER_2(xsmaxdp, void, env, i32)
+DEF_HELPER_2(xsmindp, void, env, i32)
+DEF_HELPER_2(xscvdpsp, void, env, i32)
+DEF_HELPER_2(xscvdpspn, i64, env, i64)
+DEF_HELPER_2(xscvspdp, void, env, i32)
+DEF_HELPER_2(xscvspdpn, i64, env, i64)
+DEF_HELPER_2(xscvdpsxds, void, env, i32)
+DEF_HELPER_2(xscvdpsxws, void, env, i32)
+DEF_HELPER_2(xscvdpuxds, void, env, i32)
+DEF_HELPER_2(xscvdpuxws, void, env, i32)
+DEF_HELPER_2(xscvsxddp, void, env, i32)
+DEF_HELPER_2(xscvuxdsp, void, env, i32)
+DEF_HELPER_2(xscvsxdsp, void, env, i32)
+DEF_HELPER_2(xscvuxddp, void, env, i32)
+DEF_HELPER_2(xsrdpi, void, env, i32)
+DEF_HELPER_2(xsrdpic, void, env, i32)
+DEF_HELPER_2(xsrdpim, void, env, i32)
+DEF_HELPER_2(xsrdpip, void, env, i32)
+DEF_HELPER_2(xsrdpiz, void, env, i32)
+
+DEF_HELPER_2(xsaddsp, void, env, i32)
+DEF_HELPER_2(xssubsp, void, env, i32)
+DEF_HELPER_2(xsmulsp, void, env, i32)
+DEF_HELPER_2(xsdivsp, void, env, i32)
+DEF_HELPER_2(xsresp, void, env, i32)
+DEF_HELPER_2(xsrsp, i64, env, i64)
+DEF_HELPER_2(xssqrtsp, void, env, i32)
+DEF_HELPER_2(xsrsqrtesp, void, env, i32)
+DEF_HELPER_2(xsmaddasp, void, env, i32)
+DEF_HELPER_2(xsmaddmsp, void, env, i32)
+DEF_HELPER_2(xsmsubasp, void, env, i32)
+DEF_HELPER_2(xsmsubmsp, void, env, i32)
+DEF_HELPER_2(xsnmaddasp, void, env, i32)
+DEF_HELPER_2(xsnmaddmsp, void, env, i32)
+DEF_HELPER_2(xsnmsubasp, void, env, i32)
+DEF_HELPER_2(xsnmsubmsp, void, env, i32)
+
+DEF_HELPER_2(xvadddp, void, env, i32)
+DEF_HELPER_2(xvsubdp, void, env, i32)
+DEF_HELPER_2(xvmuldp, void, env, i32)
+DEF_HELPER_2(xvdivdp, void, env, i32)
+DEF_HELPER_2(xvredp, void, env, i32)
+DEF_HELPER_2(xvsqrtdp, void, env, i32)
+DEF_HELPER_2(xvrsqrtedp, void, env, i32)
+DEF_HELPER_2(xvtdivdp, void, env, i32)
+DEF_HELPER_2(xvtsqrtdp, void, env, i32)
+DEF_HELPER_2(xvmaddadp, void, env, i32)
+DEF_HELPER_2(xvmaddmdp, void, env, i32)
+DEF_HELPER_2(xvmsubadp, void, env, i32)
+DEF_HELPER_2(xvmsubmdp, void, env, i32)
+DEF_HELPER_2(xvnmaddadp, void, env, i32)
+DEF_HELPER_2(xvnmaddmdp, void, env, i32)
+DEF_HELPER_2(xvnmsubadp, void, env, i32)
+DEF_HELPER_2(xvnmsubmdp, void, env, i32)
+DEF_HELPER_2(xvmaxdp, void, env, i32)
+DEF_HELPER_2(xvmindp, void, env, i32)
+DEF_HELPER_2(xvcmpeqdp, void, env, i32)
+DEF_HELPER_2(xvcmpgedp, void, env, i32)
+DEF_HELPER_2(xvcmpgtdp, void, env, i32)
+DEF_HELPER_2(xvcvdpsp, void, env, i32)
+DEF_HELPER_2(xvcvdpsxds, void, env, i32)
+DEF_HELPER_2(xvcvdpsxws, void, env, i32)
+DEF_HELPER_2(xvcvdpuxds, void, env, i32)
+DEF_HELPER_2(xvcvdpuxws, void, env, i32)
+DEF_HELPER_2(xvcvsxddp, void, env, i32)
+DEF_HELPER_2(xvcvuxddp, void, env, i32)
+DEF_HELPER_2(xvcvsxwdp, void, env, i32)
+DEF_HELPER_2(xvcvuxwdp, void, env, i32)
+DEF_HELPER_2(xvrdpi, void, env, i32)
+DEF_HELPER_2(xvrdpic, void, env, i32)
+DEF_HELPER_2(xvrdpim, void, env, i32)
+DEF_HELPER_2(xvrdpip, void, env, i32)
+DEF_HELPER_2(xvrdpiz, void, env, i32)
+
+DEF_HELPER_2(xvaddsp, void, env, i32)
+DEF_HELPER_2(xvsubsp, void, env, i32)
+DEF_HELPER_2(xvmulsp, void, env, i32)
+DEF_HELPER_2(xvdivsp, void, env, i32)
+DEF_HELPER_2(xvresp, void, env, i32)
+DEF_HELPER_2(xvsqrtsp, void, env, i32)
+DEF_HELPER_2(xvrsqrtesp, void, env, i32)
+DEF_HELPER_2(xvtdivsp, void, env, i32)
+DEF_HELPER_2(xvtsqrtsp, void, env, i32)
+DEF_HELPER_2(xvmaddasp, void, env, i32)
+DEF_HELPER_2(xvmaddmsp, void, env, i32)
+DEF_HELPER_2(xvmsubasp, void, env, i32)
+DEF_HELPER_2(xvmsubmsp, void, env, i32)
+DEF_HELPER_2(xvnmaddasp, void, env, i32)
+DEF_HELPER_2(xvnmaddmsp, void, env, i32)
+DEF_HELPER_2(xvnmsubasp, void, env, i32)
+DEF_HELPER_2(xvnmsubmsp, void, env, i32)
+DEF_HELPER_2(xvmaxsp, void, env, i32)
+DEF_HELPER_2(xvminsp, void, env, i32)
+DEF_HELPER_2(xvcmpeqsp, void, env, i32)
+DEF_HELPER_2(xvcmpgesp, void, env, i32)
+DEF_HELPER_2(xvcmpgtsp, void, env, i32)
+DEF_HELPER_2(xvcvspdp, void, env, i32)
+DEF_HELPER_2(xvcvspsxds, void, env, i32)
+DEF_HELPER_2(xvcvspsxws, void, env, i32)
+DEF_HELPER_2(xvcvspuxds, void, env, i32)
+DEF_HELPER_2(xvcvspuxws, void, env, i32)
+DEF_HELPER_2(xvcvsxdsp, void, env, i32)
+DEF_HELPER_2(xvcvuxdsp, void, env, i32)
+DEF_HELPER_2(xvcvsxwsp, void, env, i32)
+DEF_HELPER_2(xvcvuxwsp, void, env, i32)
+DEF_HELPER_2(xvrspi, void, env, i32)
+DEF_HELPER_2(xvrspic, void, env, i32)
+DEF_HELPER_2(xvrspim, void, env, i32)
+DEF_HELPER_2(xvrspip, void, env, i32)
+DEF_HELPER_2(xvrspiz, void, env, i32)
+
 DEF_HELPER_2(efscfsi, i32, env, i32)
 DEF_HELPER_2(efscfui, i32, env, i32)
 DEF_HELPER_2(efscfuf, i32, env, i32)
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index e50bdd20ec..63dde94b04 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -41,6 +41,119 @@ uint64_t helper_mulldo(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
 }
 #endif
 
+target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
+                           uint32_t oe)
+{
+    uint64_t rt = 0;
+    int overflow = 0;
+
+    uint64_t dividend = (uint64_t)ra << 32;
+    uint64_t divisor = (uint32_t)rb;
+
+    if (unlikely(divisor == 0)) {
+        overflow = 1;
+    } else {
+        rt = dividend / divisor;
+        overflow = rt > UINT32_MAX;
+    }
+
+    if (unlikely(overflow)) {
+        rt = 0; /* Undefined */
+    }
+
+    if (oe) {
+        if (unlikely(overflow)) {
+            env->so = env->ov = 1;
+        } else {
+            env->ov = 0;
+        }
+    }
+
+    return (target_ulong)rt;
+}
+
+target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
+                          uint32_t oe)
+{
+    int64_t rt = 0;
+    int overflow = 0;
+
+    int64_t dividend = (int64_t)ra << 32;
+    int64_t divisor = (int64_t)((int32_t)rb);
+
+    if (unlikely((divisor == 0) ||
+                 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
+        overflow = 1;
+    } else {
+        rt = dividend / divisor;
+        overflow = rt != (int32_t)rt;
+    }
+
+    if (unlikely(overflow)) {
+        rt = 0; /* Undefined */
+    }
+
+    if (oe) {
+        if (unlikely(overflow)) {
+            env->so = env->ov = 1;
+        } else {
+            env->ov = 0;
+        }
+    }
+
+    return (target_ulong)rt;
+}
+
+#if defined(TARGET_PPC64)
+
+uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
+{
+    uint64_t rt = 0;
+    int overflow = 0;
+
+    overflow = divu128(&rt, &ra, rb);
+
+    if (unlikely(overflow)) {
+        rt = 0; /* Undefined */
+    }
+
+    if (oe) {
+        if (unlikely(overflow)) {
+            env->so = env->ov = 1;
+        } else {
+            env->ov = 0;
+        }
+    }
+
+    return rt;
+}
+
+uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
+{
+    int64_t rt = 0;
+    int64_t ra = (int64_t)rau;
+    int64_t rb = (int64_t)rbu;
+    int overflow = divs128(&rt, &ra, rb);
+
+    if (unlikely(overflow)) {
+        rt = 0; /* Undefined */
+    }
+
+    if (oe) {
+
+        if (unlikely(overflow)) {
+            env->so = env->ov = 1;
+        } else {
+            env->ov = 0;
+        }
+    }
+
+    return rt;
+}
+
+#endif
+
+
 target_ulong helper_cntlzw(target_ulong t)
 {
     return clz32(t);
@@ -53,6 +166,26 @@ target_ulong helper_cntlzd(target_ulong t)
 }
 #endif
 
+#if defined(TARGET_PPC64)
+
+uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
+{
+    int i;
+    uint64_t ra = 0;
+
+    for (i = 0; i < 8; i++) {
+        int index = (rs >> (i*8)) & 0xFF;
+        if (index < 64) {
+            if (rb & (1ull << (63-index))) {
+                ra |= 1 << i;
+            }
+        }
+    }
+    return ra;
+}
+
+#endif
+
 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
 {
     target_ulong mask = 0xff;
@@ -371,6 +504,8 @@ void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 VARITH(ubm, u8)
 VARITH(uhm, u16)
 VARITH(uwm, u32)
+VARITH(udm, u64)
+VARITH_DO(muluwm, *, u32)
 #undef VARITH_DO
 #undef VARITH
 
@@ -491,15 +626,18 @@ VCF(sx, int32_to_float32, s32)
     void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r,            \
                              ppc_avr_t *a, ppc_avr_t *b)                \
     {                                                                   \
-        uint32_t ones = (uint32_t)-1;                                   \
-        uint32_t all = ones;                                            \
-        uint32_t none = 0;                                              \
+        uint64_t ones = (uint64_t)-1;                                   \
+        uint64_t all = ones;                                            \
+        uint64_t none = 0;                                              \
         int i;                                                          \
                                                                         \
         for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
-            uint32_t result = (a->element[i] compare b->element[i] ?    \
+            uint64_t result = (a->element[i] compare b->element[i] ?    \
                                ones : 0x0);                             \
             switch (sizeof(a->element[0])) {                            \
+            case 8:                                                     \
+                r->u64[i] = result;                                     \
+                break;                                                  \
             case 4:                                                     \
                 r->u32[i] = result;                                     \
                 break;                                                  \
@@ -523,12 +661,15 @@ VCF(sx, int32_to_float32, s32)
 VCMP(equb, ==, u8)
 VCMP(equh, ==, u16)
 VCMP(equw, ==, u32)
+VCMP(equd, ==, u64)
 VCMP(gtub, >, u8)
 VCMP(gtuh, >, u16)
 VCMP(gtuw, >, u32)
+VCMP(gtud, >, u64)
 VCMP(gtsb, >, s8)
 VCMP(gtsh, >, s16)
 VCMP(gtsw, >, s32)
+VCMP(gtsd, >, s64)
 #undef VCMP_DO
 #undef VCMP
 
@@ -689,9 +830,11 @@ void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 VMINMAX(sb, s8)
 VMINMAX(sh, s16)
 VMINMAX(sw, s32)
+VMINMAX(sd, s64)
 VMINMAX(ub, u8)
 VMINMAX(uh, u16)
 VMINMAX(uw, u32)
+VMINMAX(ud, u64)
 #undef VMINMAX_DO
 #undef VMINMAX
 
@@ -849,28 +992,32 @@ void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
     }
 }
 
-#define VMUL_DO(name, mul_element, prod_element, evenp)                 \
+#define VMUL_DO(name, mul_element, prod_element, cast, evenp)           \
     void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
     {                                                                   \
         int i;                                                          \
                                                                         \
         VECTOR_FOR_INORDER_I(i, prod_element) {                         \
             if (evenp) {                                                \
-                r->prod_element[i] = a->mul_element[i * 2 + HI_IDX] *   \
-                    b->mul_element[i * 2 + HI_IDX];                     \
+                r->prod_element[i] =                                    \
+                    (cast)a->mul_element[i * 2 + HI_IDX] *              \
+                    (cast)b->mul_element[i * 2 + HI_IDX];               \
             } else {                                                    \
-                r->prod_element[i] = a->mul_element[i * 2 + LO_IDX] *   \
-                    b->mul_element[i * 2 + LO_IDX];                     \
+                r->prod_element[i] =                                    \
+                    (cast)a->mul_element[i * 2 + LO_IDX] *              \
+                    (cast)b->mul_element[i * 2 + LO_IDX];               \
             }                                                           \
         }                                                               \
     }
-#define VMUL(suffix, mul_element, prod_element)         \
-    VMUL_DO(mule##suffix, mul_element, prod_element, 1) \
-    VMUL_DO(mulo##suffix, mul_element, prod_element, 0)
-VMUL(sb, s8, s16)
-VMUL(sh, s16, s32)
-VMUL(ub, u8, u16)
-VMUL(uh, u16, u32)
+#define VMUL(suffix, mul_element, prod_element, cast)            \
+    VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1)    \
+    VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
+VMUL(sb, s8, s16, int16_t)
+VMUL(sh, s16, s32, int32_t)
+VMUL(sw, s32, s64, int64_t)
+VMUL(ub, u8, u16, uint16_t)
+VMUL(uh, u16, u32, uint32_t)
+VMUL(uw, u32, u64, uint64_t)
 #undef VMUL_DO
 #undef VMUL
 
@@ -898,6 +1045,383 @@ void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
 }
 
 #if defined(HOST_WORDS_BIGENDIAN)
+#define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
+#define VBPERMQ_DW(index) (((index) & 0x40) != 0)
+#else
+#define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
+#define VBPERMQ_DW(index) (((index) & 0x40) == 0)
+#endif
+
+void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+    int i;
+    uint64_t perm = 0;
+
+    VECTOR_FOR_INORDER_I(i, u8) {
+        int index = VBPERMQ_INDEX(b, i);
+
+        if (index < 128) {
+            uint64_t mask = (1ull << (63-(index & 0x3F)));
+            if (a->u64[VBPERMQ_DW(index)] & mask) {
+                perm |= (0x8000 >> i);
+            }
+        }
+    }
+
+    r->u64[HI_IDX] = perm;
+    r->u64[LO_IDX] = 0;
+}
+
+#undef VBPERMQ_INDEX
+#undef VBPERMQ_DW
+
+uint64_t VGBBD_MASKS[256] = {
+    0x0000000000000000ull, /* 00 */
+    0x0000000000000080ull, /* 01 */
+    0x0000000000008000ull, /* 02 */
+    0x0000000000008080ull, /* 03 */
+    0x0000000000800000ull, /* 04 */
+    0x0000000000800080ull, /* 05 */
+    0x0000000000808000ull, /* 06 */
+    0x0000000000808080ull, /* 07 */
+    0x0000000080000000ull, /* 08 */
+    0x0000000080000080ull, /* 09 */
+    0x0000000080008000ull, /* 0A */
+    0x0000000080008080ull, /* 0B */
+    0x0000000080800000ull, /* 0C */
+    0x0000000080800080ull, /* 0D */
+    0x0000000080808000ull, /* 0E */
+    0x0000000080808080ull, /* 0F */
+    0x0000008000000000ull, /* 10 */
+    0x0000008000000080ull, /* 11 */
+    0x0000008000008000ull, /* 12 */
+    0x0000008000008080ull, /* 13 */
+    0x0000008000800000ull, /* 14 */
+    0x0000008000800080ull, /* 15 */
+    0x0000008000808000ull, /* 16 */
+    0x0000008000808080ull, /* 17 */
+    0x0000008080000000ull, /* 18 */
+    0x0000008080000080ull, /* 19 */
+    0x0000008080008000ull, /* 1A */
+    0x0000008080008080ull, /* 1B */
+    0x0000008080800000ull, /* 1C */
+    0x0000008080800080ull, /* 1D */
+    0x0000008080808000ull, /* 1E */
+    0x0000008080808080ull, /* 1F */
+    0x0000800000000000ull, /* 20 */
+    0x0000800000000080ull, /* 21 */
+    0x0000800000008000ull, /* 22 */
+    0x0000800000008080ull, /* 23 */
+    0x0000800000800000ull, /* 24 */
+    0x0000800000800080ull, /* 25 */
+    0x0000800000808000ull, /* 26 */
+    0x0000800000808080ull, /* 27 */
+    0x0000800080000000ull, /* 28 */
+    0x0000800080000080ull, /* 29 */
+    0x0000800080008000ull, /* 2A */
+    0x0000800080008080ull, /* 2B */
+    0x0000800080800000ull, /* 2C */
+    0x0000800080800080ull, /* 2D */
+    0x0000800080808000ull, /* 2E */
+    0x0000800080808080ull, /* 2F */
+    0x0000808000000000ull, /* 30 */
+    0x0000808000000080ull, /* 31 */
+    0x0000808000008000ull, /* 32 */
+    0x0000808000008080ull, /* 33 */
+    0x0000808000800000ull, /* 34 */
+    0x0000808000800080ull, /* 35 */
+    0x0000808000808000ull, /* 36 */
+    0x0000808000808080ull, /* 37 */
+    0x0000808080000000ull, /* 38 */
+    0x0000808080000080ull, /* 39 */
+    0x0000808080008000ull, /* 3A */
+    0x0000808080008080ull, /* 3B */
+    0x0000808080800000ull, /* 3C */
+    0x0000808080800080ull, /* 3D */
+    0x0000808080808000ull, /* 3E */
+    0x0000808080808080ull, /* 3F */
+    0x0080000000000000ull, /* 40 */
+    0x0080000000000080ull, /* 41 */
+    0x0080000000008000ull, /* 42 */
+    0x0080000000008080ull, /* 43 */
+    0x0080000000800000ull, /* 44 */
+    0x0080000000800080ull, /* 45 */
+    0x0080000000808000ull, /* 46 */
+    0x0080000000808080ull, /* 47 */
+    0x0080000080000000ull, /* 48 */
+    0x0080000080000080ull, /* 49 */
+    0x0080000080008000ull, /* 4A */
+    0x0080000080008080ull, /* 4B */
+    0x0080000080800000ull, /* 4C */
+    0x0080000080800080ull, /* 4D */
+    0x0080000080808000ull, /* 4E */
+    0x0080000080808080ull, /* 4F */
+    0x0080008000000000ull, /* 50 */
+    0x0080008000000080ull, /* 51 */
+    0x0080008000008000ull, /* 52 */
+    0x0080008000008080ull, /* 53 */
+    0x0080008000800000ull, /* 54 */
+    0x0080008000800080ull, /* 55 */
+    0x0080008000808000ull, /* 56 */
+    0x0080008000808080ull, /* 57 */
+    0x0080008080000000ull, /* 58 */
+    0x0080008080000080ull, /* 59 */
+    0x0080008080008000ull, /* 5A */
+    0x0080008080008080ull, /* 5B */
+    0x0080008080800000ull, /* 5C */
+    0x0080008080800080ull, /* 5D */
+    0x0080008080808000ull, /* 5E */
+    0x0080008080808080ull, /* 5F */
+    0x0080800000000000ull, /* 60 */
+    0x0080800000000080ull, /* 61 */
+    0x0080800000008000ull, /* 62 */
+    0x0080800000008080ull, /* 63 */
+    0x0080800000800000ull, /* 64 */
+    0x0080800000800080ull, /* 65 */
+    0x0080800000808000ull, /* 66 */
+    0x0080800000808080ull, /* 67 */
+    0x0080800080000000ull, /* 68 */
+    0x0080800080000080ull, /* 69 */
+    0x0080800080008000ull, /* 6A */
+    0x0080800080008080ull, /* 6B */
+    0x0080800080800000ull, /* 6C */
+    0x0080800080800080ull, /* 6D */
+    0x0080800080808000ull, /* 6E */
+    0x0080800080808080ull, /* 6F */
+    0x0080808000000000ull, /* 70 */
+    0x0080808000000080ull, /* 71 */
+    0x0080808000008000ull, /* 72 */
+    0x0080808000008080ull, /* 73 */
+    0x0080808000800000ull, /* 74 */
+    0x0080808000800080ull, /* 75 */
+    0x0080808000808000ull, /* 76 */
+    0x0080808000808080ull, /* 77 */
+    0x0080808080000000ull, /* 78 */
+    0x0080808080000080ull, /* 79 */
+    0x0080808080008000ull, /* 7A */
+    0x0080808080008080ull, /* 7B */
+    0x0080808080800000ull, /* 7C */
+    0x0080808080800080ull, /* 7D */
+    0x0080808080808000ull, /* 7E */
+    0x0080808080808080ull, /* 7F */
+    0x8000000000000000ull, /* 80 */
+    0x8000000000000080ull, /* 81 */
+    0x8000000000008000ull, /* 82 */
+    0x8000000000008080ull, /* 83 */
+    0x8000000000800000ull, /* 84 */
+    0x8000000000800080ull, /* 85 */
+    0x8000000000808000ull, /* 86 */
+    0x8000000000808080ull, /* 87 */
+    0x8000000080000000ull, /* 88 */
+    0x8000000080000080ull, /* 89 */
+    0x8000000080008000ull, /* 8A */
+    0x8000000080008080ull, /* 8B */
+    0x8000000080800000ull, /* 8C */
+    0x8000000080800080ull, /* 8D */
+    0x8000000080808000ull, /* 8E */
+    0x8000000080808080ull, /* 8F */
+    0x8000008000000000ull, /* 90 */
+    0x8000008000000080ull, /* 91 */
+    0x8000008000008000ull, /* 92 */
+    0x8000008000008080ull, /* 93 */
+    0x8000008000800000ull, /* 94 */
+    0x8000008000800080ull, /* 95 */
+    0x8000008000808000ull, /* 96 */
+    0x8000008000808080ull, /* 97 */
+    0x8000008080000000ull, /* 98 */
+    0x8000008080000080ull, /* 99 */
+    0x8000008080008000ull, /* 9A */
+    0x8000008080008080ull, /* 9B */
+    0x8000008080800000ull, /* 9C */
+    0x8000008080800080ull, /* 9D */
+    0x8000008080808000ull, /* 9E */
+    0x8000008080808080ull, /* 9F */
+    0x8000800000000000ull, /* A0 */
+    0x8000800000000080ull, /* A1 */
+    0x8000800000008000ull, /* A2 */
+    0x8000800000008080ull, /* A3 */
+    0x8000800000800000ull, /* A4 */
+    0x8000800000800080ull, /* A5 */
+    0x8000800000808000ull, /* A6 */
+    0x8000800000808080ull, /* A7 */
+    0x8000800080000000ull, /* A8 */
+    0x8000800080000080ull, /* A9 */
+    0x8000800080008000ull, /* AA */
+    0x8000800080008080ull, /* AB */
+    0x8000800080800000ull, /* AC */
+    0x8000800080800080ull, /* AD */
+    0x8000800080808000ull, /* AE */
+    0x8000800080808080ull, /* AF */
+    0x8000808000000000ull, /* B0 */
+    0x8000808000000080ull, /* B1 */
+    0x8000808000008000ull, /* B2 */
+    0x8000808000008080ull, /* B3 */
+    0x8000808000800000ull, /* B4 */
+    0x8000808000800080ull, /* B5 */
+    0x8000808000808000ull, /* B6 */
+    0x8000808000808080ull, /* B7 */
+    0x8000808080000000ull, /* B8 */
+    0x8000808080000080ull, /* B9 */
+    0x8000808080008000ull, /* BA */
+    0x8000808080008080ull, /* BB */
+    0x8000808080800000ull, /* BC */
+    0x8000808080800080ull, /* BD */
+    0x8000808080808000ull, /* BE */
+    0x8000808080808080ull, /* BF */
+    0x8080000000000000ull, /* C0 */
+    0x8080000000000080ull, /* C1 */
+    0x8080000000008000ull, /* C2 */
+    0x8080000000008080ull, /* C3 */
+    0x8080000000800000ull, /* C4 */
+    0x8080000000800080ull, /* C5 */
+    0x8080000000808000ull, /* C6 */
+    0x8080000000808080ull, /* C7 */
+    0x8080000080000000ull, /* C8 */
+    0x8080000080000080ull, /* C9 */
+    0x8080000080008000ull, /* CA */
+    0x8080000080008080ull, /* CB */
+    0x8080000080800000ull, /* CC */
+    0x8080000080800080ull, /* CD */
+    0x8080000080808000ull, /* CE */
+    0x8080000080808080ull, /* CF */
+    0x8080008000000000ull, /* D0 */
+    0x8080008000000080ull, /* D1 */
+    0x8080008000008000ull, /* D2 */
+    0x8080008000008080ull, /* D3 */
+    0x8080008000800000ull, /* D4 */
+    0x8080008000800080ull, /* D5 */
+    0x8080008000808000ull, /* D6 */
+    0x8080008000808080ull, /* D7 */
+    0x8080008080000000ull, /* D8 */
+    0x8080008080000080ull, /* D9 */
+    0x8080008080008000ull, /* DA */
+    0x8080008080008080ull, /* DB */
+    0x8080008080800000ull, /* DC */
+    0x8080008080800080ull, /* DD */
+    0x8080008080808000ull, /* DE */
+    0x8080008080808080ull, /* DF */
+    0x8080800000000000ull, /* E0 */
+    0x8080800000000080ull, /* E1 */
+    0x8080800000008000ull, /* E2 */
+    0x8080800000008080ull, /* E3 */
+    0x8080800000800000ull, /* E4 */
+    0x8080800000800080ull, /* E5 */
+    0x8080800000808000ull, /* E6 */
+    0x8080800000808080ull, /* E7 */
+    0x8080800080000000ull, /* E8 */
+    0x8080800080000080ull, /* E9 */
+    0x8080800080008000ull, /* EA */
+    0x8080800080008080ull, /* EB */
+    0x8080800080800000ull, /* EC */
+    0x8080800080800080ull, /* ED */
+    0x8080800080808000ull, /* EE */
+    0x8080800080808080ull, /* EF */
+    0x8080808000000000ull, /* F0 */
+    0x8080808000000080ull, /* F1 */
+    0x8080808000008000ull, /* F2 */
+    0x8080808000008080ull, /* F3 */
+    0x8080808000800000ull, /* F4 */
+    0x8080808000800080ull, /* F5 */
+    0x8080808000808000ull, /* F6 */
+    0x8080808000808080ull, /* F7 */
+    0x8080808080000000ull, /* F8 */
+    0x8080808080000080ull, /* F9 */
+    0x8080808080008000ull, /* FA */
+    0x8080808080008080ull, /* FB */
+    0x8080808080800000ull, /* FC */
+    0x8080808080800080ull, /* FD */
+    0x8080808080808000ull, /* FE */
+    0x8080808080808080ull, /* FF */
+};
+
+void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
+{
+    int i;
+    uint64_t t[2] = { 0, 0 };
+
+    VECTOR_FOR_INORDER_I(i, u8) {
+#if defined(HOST_WORDS_BIGENDIAN)
+        t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
+#else
+        t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
+#endif
+    }
+
+    r->u64[0] = t[0];
+    r->u64[1] = t[1];
+}
+
+#define PMSUM(name, srcfld, trgfld, trgtyp)                   \
+void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)  \
+{                                                             \
+    int i, j;                                                 \
+    trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])];      \
+                                                              \
+    VECTOR_FOR_INORDER_I(i, srcfld) {                         \
+        prod[i] = 0;                                          \
+        for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) {      \
+            if (a->srcfld[i] & (1ull<<j)) {                   \
+                prod[i] ^= ((trgtyp)b->srcfld[i] << j);       \
+            }                                                 \
+        }                                                     \
+    }                                                         \
+                                                              \
+    VECTOR_FOR_INORDER_I(i, trgfld) {                         \
+        r->trgfld[i] = prod[2*i] ^ prod[2*i+1];               \
+    }                                                         \
+}
+
+PMSUM(vpmsumb, u8, u16, uint16_t)
+PMSUM(vpmsumh, u16, u32, uint32_t)
+PMSUM(vpmsumw, u32, u64, uint64_t)
+
+void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+
+#ifdef CONFIG_INT128
+    int i, j;
+    __uint128_t prod[2];
+
+    VECTOR_FOR_INORDER_I(i, u64) {
+        prod[i] = 0;
+        for (j = 0; j < 64; j++) {
+            if (a->u64[i] & (1ull<<j)) {
+                prod[i] ^= (((__uint128_t)b->u64[i]) << j);
+            }
+        }
+    }
+
+    r->u128 = prod[0] ^ prod[1];
+
+#else
+    int i, j;
+    ppc_avr_t prod[2];
+
+    VECTOR_FOR_INORDER_I(i, u64) {
+        prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
+        for (j = 0; j < 64; j++) {
+            if (a->u64[i] & (1ull<<j)) {
+                ppc_avr_t bshift;
+                if (j == 0) {
+                    bshift.u64[HI_IDX] = 0;
+                    bshift.u64[LO_IDX] = b->u64[i];
+                } else {
+                    bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
+                    bshift.u64[LO_IDX] = b->u64[i] << j;
+                }
+                prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
+                prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
+            }
+        }
+    }
+
+    r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
+    r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
+#endif
+}
+
+
+#if defined(HOST_WORDS_BIGENDIAN)
 #define PKBIG 1
 #else
 #define PKBIG 0
@@ -948,10 +1472,14 @@ VPK(shss, s16, s8, cvtshsb, 1)
 VPK(shus, s16, u8, cvtshub, 1)
 VPK(swss, s32, s16, cvtswsh, 1)
 VPK(swus, s32, u16, cvtswuh, 1)
+VPK(sdss, s64, s32, cvtsdsw, 1)
+VPK(sdus, s64, u32, cvtsduw, 1)
 VPK(uhus, u16, u8, cvtuhub, 1)
 VPK(uwus, u32, u16, cvtuwuh, 1)
+VPK(udus, u64, u32, cvtuduw, 1)
 VPK(uhum, u16, u8, I, 0)
 VPK(uwum, u32, u16, I, 0)
+VPK(udum, u64, u32, I, 0)
 #undef I
 #undef VPK
 #undef PKBIG
@@ -983,23 +1511,21 @@ VRFI(p, float_round_up)
 VRFI(z, float_round_to_zero)
 #undef VRFI
 
-#define VROTATE(suffix, element)                                        \
+#define VROTATE(suffix, element, mask)                                  \
     void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
     {                                                                   \
         int i;                                                          \
                                                                         \
         for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
-            unsigned int mask = ((1 <<                                  \
-                                  (3 + (sizeof(a->element[0]) >> 1)))   \
-                                 - 1);                                  \
             unsigned int shift = b->element[i] & mask;                  \
             r->element[i] = (a->element[i] << shift) |                  \
                 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
         }                                                               \
     }
-VROTATE(b, u8)
-VROTATE(h, u16)
-VROTATE(w, u32)
+VROTATE(b, u8, 0x7)
+VROTATE(h, u16, 0xF)
+VROTATE(w, u32, 0x1F)
+VROTATE(d, u64, 0x3F)
 #undef VROTATE
 
 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
@@ -1080,23 +1606,21 @@ VSHIFT(r, RIGHT)
 #undef LEFT
 #undef RIGHT
 
-#define VSL(suffix, element)                                            \
+#define VSL(suffix, element, mask)                                      \
     void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
     {                                                                   \
         int i;                                                          \
                                                                         \
         for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
-            unsigned int mask = ((1 <<                                  \
-                                  (3 + (sizeof(a->element[0]) >> 1)))   \
-                                 - 1);                                  \
             unsigned int shift = b->element[i] & mask;                  \
                                                                         \
             r->element[i] = a->element[i] << shift;                     \
         }                                                               \
     }
-VSL(b, u8)
-VSL(h, u16)
-VSL(w, u32)
+VSL(b, u8, 0x7)
+VSL(h, u16, 0x0F)
+VSL(w, u32, 0x1F)
+VSL(d, u64, 0x3F)
 #undef VSL
 
 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
@@ -1180,26 +1704,24 @@ VSPLTI(h, s16, int16_t)
 VSPLTI(w, s32, int32_t)
 #undef VSPLTI
 
-#define VSR(suffix, element)                                            \
+#define VSR(suffix, element, mask)                                      \
     void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
     {                                                                   \
         int i;                                                          \
                                                                         \
         for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
-            unsigned int mask = ((1 <<                                  \
-                                  (3 + (sizeof(a->element[0]) >> 1)))   \
-                                 - 1);                                  \
             unsigned int shift = b->element[i] & mask;                  \
-                                                                        \
             r->element[i] = a->element[i] >> shift;                     \
         }                                                               \
     }
-VSR(ab, s8)
-VSR(ah, s16)
-VSR(aw, s32)
-VSR(b, u8)
-VSR(h, u16)
-VSR(w, u32)
+VSR(ab, s8, 0x7)
+VSR(ah, s16, 0xF)
+VSR(aw, s32, 0x1F)
+VSR(ad, s64, 0x3F)
+VSR(b, u8, 0x7)
+VSR(h, u16, 0xF)
+VSR(w, u32, 0x1F)
+VSR(d, u64, 0x3F)
 #undef VSR
 
 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
@@ -1379,12 +1901,819 @@ VUPKPX(hpx, UPKHI)
     }
 VUPK(hsb, s16, s8, UPKHI)
 VUPK(hsh, s32, s16, UPKHI)
+VUPK(hsw, s64, s32, UPKHI)
 VUPK(lsb, s16, s8, UPKLO)
 VUPK(lsh, s32, s16, UPKLO)
+VUPK(lsw, s64, s32, UPKLO)
 #undef VUPK
 #undef UPKHI
 #undef UPKLO
 
+#define VGENERIC_DO(name, element)                                      \
+    void helper_v##name(ppc_avr_t *r, ppc_avr_t *b)                     \
+    {                                                                   \
+        int i;                                                          \
+                                                                        \
+        VECTOR_FOR_INORDER_I(i, element) {                              \
+            r->element[i] = name(b->element[i]);                        \
+        }                                                               \
+    }
+
+#define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
+#define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
+#define clzw(v) clz32((v))
+#define clzd(v) clz64((v))
+
+VGENERIC_DO(clzb, u8)
+VGENERIC_DO(clzh, u16)
+VGENERIC_DO(clzw, u32)
+VGENERIC_DO(clzd, u64)
+
+#undef clzb
+#undef clzh
+#undef clzw
+#undef clzd
+
+#define popcntb(v) ctpop8(v)
+#define popcnth(v) ctpop16(v)
+#define popcntw(v) ctpop32(v)
+#define popcntd(v) ctpop64(v)
+
+VGENERIC_DO(popcntb, u8)
+VGENERIC_DO(popcnth, u16)
+VGENERIC_DO(popcntw, u32)
+VGENERIC_DO(popcntd, u64)
+
+#undef popcntb
+#undef popcnth
+#undef popcntw
+#undef popcntd
+
+#undef VGENERIC_DO
+
+#if defined(HOST_WORDS_BIGENDIAN)
+#define QW_ONE { .u64 = { 0, 1 } }
+#else
+#define QW_ONE { .u64 = { 1, 0 } }
+#endif
+
+#ifndef CONFIG_INT128
+
+static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
+{
+    t->u64[0] = ~a.u64[0];
+    t->u64[1] = ~a.u64[1];
+}
+
+static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
+{
+    if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
+        return -1;
+    } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
+        return 1;
+    } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
+        return -1;
+    } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
+static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
+{
+    t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
+    t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
+                     (~a.u64[LO_IDX] < b.u64[LO_IDX]);
+}
+
+static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
+{
+    ppc_avr_t not_a;
+    t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
+    t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
+                     (~a.u64[LO_IDX] < b.u64[LO_IDX]);
+    avr_qw_not(&not_a, a);
+    return avr_qw_cmpu(not_a, b) < 0;
+}
+
+#endif
+
+void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+#ifdef CONFIG_INT128
+    r->u128 = a->u128 + b->u128;
+#else
+    avr_qw_add(r, *a, *b);
+#endif
+}
+
+void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
+{
+#ifdef CONFIG_INT128
+    r->u128 = a->u128 + b->u128 + (c->u128 & 1);
+#else
+
+    if (c->u64[LO_IDX] & 1) {
+        ppc_avr_t tmp;
+
+        tmp.u64[HI_IDX] = 0;
+        tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
+        avr_qw_add(&tmp, *a, tmp);
+        avr_qw_add(r, tmp, *b);
+    } else {
+        avr_qw_add(r, *a, *b);
+    }
+#endif
+}
+
+void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+#ifdef CONFIG_INT128
+    r->u128 = (~a->u128 < b->u128);
+#else
+    ppc_avr_t not_a;
+
+    avr_qw_not(&not_a, *a);
+
+    r->u64[HI_IDX] = 0;
+    r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
+#endif
+}
+
+void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
+{
+#ifdef CONFIG_INT128
+    int carry_out = (~a->u128 < b->u128);
+    if (!carry_out && (c->u128 & 1)) {
+        carry_out = ((a->u128 + b->u128 + 1) == 0) &&
+                    ((a->u128 != 0) || (b->u128 != 0));
+    }
+    r->u128 = carry_out;
+#else
+
+    int carry_in = c->u64[LO_IDX] & 1;
+    int carry_out = 0;
+    ppc_avr_t tmp;
+
+    carry_out = avr_qw_addc(&tmp, *a, *b);
+
+    if (!carry_out && carry_in) {
+        ppc_avr_t one = QW_ONE;
+        carry_out = avr_qw_addc(&tmp, tmp, one);
+    }
+    r->u64[HI_IDX] = 0;
+    r->u64[LO_IDX] = carry_out;
+#endif
+}
+
+void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+#ifdef CONFIG_INT128
+    r->u128 = a->u128 - b->u128;
+#else
+    ppc_avr_t tmp;
+    ppc_avr_t one = QW_ONE;
+
+    avr_qw_not(&tmp, *b);
+    avr_qw_add(&tmp, *a, tmp);
+    avr_qw_add(r, tmp, one);
+#endif
+}
+
+void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
+{
+#ifdef CONFIG_INT128
+    r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
+#else
+    ppc_avr_t tmp, sum;
+
+    avr_qw_not(&tmp, *b);
+    avr_qw_add(&sum, *a, tmp);
+
+    tmp.u64[HI_IDX] = 0;
+    tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
+    avr_qw_add(r, sum, tmp);
+#endif
+}
+
+void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+#ifdef CONFIG_INT128
+    r->u128 = (~a->u128 < ~b->u128) ||
+                 (a->u128 + ~b->u128 == (__uint128_t)-1);
+#else
+    int carry = (avr_qw_cmpu(*a, *b) > 0);
+    if (!carry) {
+        ppc_avr_t tmp;
+        avr_qw_not(&tmp, *b);
+        avr_qw_add(&tmp, *a, tmp);
+        carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
+    }
+    r->u64[HI_IDX] = 0;
+    r->u64[LO_IDX] = carry;
+#endif
+}
+
+void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
+{
+#ifdef CONFIG_INT128
+    r->u128 =
+        (~a->u128 < ~b->u128) ||
+        ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
+#else
+    int carry_in = c->u64[LO_IDX] & 1;
+    int carry_out = (avr_qw_cmpu(*a, *b) > 0);
+    if (!carry_out && carry_in) {
+        ppc_avr_t tmp;
+        avr_qw_not(&tmp, *b);
+        avr_qw_add(&tmp, *a, tmp);
+        carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
+    }
+
+    r->u64[HI_IDX] = 0;
+    r->u64[LO_IDX] = carry_out;
+#endif
+}
+
+#define BCD_PLUS_PREF_1 0xC
+#define BCD_PLUS_PREF_2 0xF
+#define BCD_PLUS_ALT_1  0xA
+#define BCD_NEG_PREF    0xD
+#define BCD_NEG_ALT     0xB
+#define BCD_PLUS_ALT_2  0xE
+
+#if defined(HOST_WORDS_BIGENDIAN)
+#define BCD_DIG_BYTE(n) (15 - (n/2))
+#else
+#define BCD_DIG_BYTE(n) (n/2)
+#endif
+
+static int bcd_get_sgn(ppc_avr_t *bcd)
+{
+    switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
+    case BCD_PLUS_PREF_1:
+    case BCD_PLUS_PREF_2:
+    case BCD_PLUS_ALT_1:
+    case BCD_PLUS_ALT_2:
+    {
+        return 1;
+    }
+
+    case BCD_NEG_PREF:
+    case BCD_NEG_ALT:
+    {
+        return -1;
+    }
+
+    default:
+    {
+        return 0;
+    }
+    }
+}
+
+static int bcd_preferred_sgn(int sgn, int ps)
+{
+    if (sgn >= 0) {
+        return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
+    } else {
+        return BCD_NEG_PREF;
+    }
+}
+
+static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
+{
+    uint8_t result;
+    if (n & 1) {
+        result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
+    } else {
+       result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
+    }
+
+    if (unlikely(result > 9)) {
+        *invalid = true;
+    }
+    return result;
+}
+
+static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
+{
+    if (n & 1) {
+        bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
+        bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
+    } else {
+        bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
+        bcd->u8[BCD_DIG_BYTE(n)] |= digit;
+    }
+}
+
+static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
+{
+    int i;
+    int invalid = 0;
+    for (i = 31; i > 0; i--) {
+        uint8_t dig_a = bcd_get_digit(a, i, &invalid);
+        uint8_t dig_b = bcd_get_digit(b, i, &invalid);
+        if (unlikely(invalid)) {
+            return 0; /* doesnt matter */
+        } else if (dig_a > dig_b) {
+            return 1;
+        } else if (dig_a < dig_b) {
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
+                       int *overflow)
+{
+    int carry = 0;
+    int i;
+    int is_zero = 1;
+    for (i = 1; i <= 31; i++) {
+        uint8_t digit = bcd_get_digit(a, i, invalid) +
+                        bcd_get_digit(b, i, invalid) + carry;
+        is_zero &= (digit == 0);
+        if (digit > 9) {
+            carry = 1;
+            digit -= 10;
+        } else {
+            carry = 0;
+        }
+
+        bcd_put_digit(t, digit, i);
+
+        if (unlikely(*invalid)) {
+            return -1;
+        }
+    }
+
+    *overflow = carry;
+    return is_zero;
+}
+
+static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
+                       int *overflow)
+{
+    int carry = 0;
+    int i;
+    int is_zero = 1;
+    for (i = 1; i <= 31; i++) {
+        uint8_t digit = bcd_get_digit(a, i, invalid) -
+                        bcd_get_digit(b, i, invalid) + carry;
+        is_zero &= (digit == 0);
+        if (digit & 0x80) {
+            carry = -1;
+            digit += 10;
+        } else {
+            carry = 0;
+        }
+
+        bcd_put_digit(t, digit, i);
+
+        if (unlikely(*invalid)) {
+            return -1;
+        }
+    }
+
+    *overflow = carry;
+    return is_zero;
+}
+
+uint32_t helper_bcdadd(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
+{
+
+    int sgna = bcd_get_sgn(a);
+    int sgnb = bcd_get_sgn(b);
+    int invalid = (sgna == 0) || (sgnb == 0);
+    int overflow = 0;
+    int zero = 0;
+    uint32_t cr = 0;
+    ppc_avr_t result = { .u64 = { 0, 0 } };
+
+    if (!invalid) {
+        if (sgna == sgnb) {
+            result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
+            zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
+            cr = (sgna > 0) ? 4 : 8;
+        } else if (bcd_cmp_mag(a, b) > 0) {
+            result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
+            zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
+            cr = (sgna > 0) ? 4 : 8;
+        } else {
+            result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
+            zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
+            cr = (sgnb > 0) ? 4 : 8;
+        }
+    }
+
+    if (unlikely(invalid)) {
+        result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
+        cr = 1;
+    } else if (overflow) {
+        cr |= 1;
+    } else if (zero) {
+        cr = 2;
+    }
+
+    *r = result;
+
+    return cr;
+}
+
+uint32_t helper_bcdsub(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
+{
+    ppc_avr_t bcopy = *b;
+    int sgnb = bcd_get_sgn(b);
+    if (sgnb < 0) {
+        bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
+    } else if (sgnb > 0) {
+        bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
+    }
+    /* else invalid ... defer to bcdadd code for proper handling */
+
+    return helper_bcdadd(r, a, &bcopy, ps);
+}
+
+static uint8_t SBOX[256] = {
+0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5,
+0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
+0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0,
+0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
+0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC,
+0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
+0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A,
+0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
+0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0,
+0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
+0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B,
+0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
+0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85,
+0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
+0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5,
+0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
+0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17,
+0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
+0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88,
+0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
+0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C,
+0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
+0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9,
+0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
+0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6,
+0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
+0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E,
+0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
+0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94,
+0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
+0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68,
+0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16,
+};
+
+static void SubBytes(ppc_avr_t *r, ppc_avr_t *a)
+{
+    int i;
+    VECTOR_FOR_INORDER_I(i, u8) {
+        r->u8[i] = SBOX[a->u8[i]];
+    }
+}
+
+static uint8_t InvSBOX[256] = {
+0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38,
+0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB,
+0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87,
+0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB,
+0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D,
+0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
+0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2,
+0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25,
+0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16,
+0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92,
+0x6C, 0x70, 0x48, 0x50, 0xFD, 0xED, 0xB9, 0xDA,
+0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84,
+0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A,
+0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06,
+0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02,
+0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B,
+0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA,
+0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73,
+0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85,
+0xE2, 0xF9, 0x37, 0xE8, 0x1C, 0x75, 0xDF, 0x6E,
+0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89,
+0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B,
+0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20,
+0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4,
+0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31,
+0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F,
+0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D,
+0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF,
+0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0,
+0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
+0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26,
+0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D,
+};
+
+static void InvSubBytes(ppc_avr_t *r, ppc_avr_t *a)
+{
+    int i;
+    VECTOR_FOR_INORDER_I(i, u8) {
+        r->u8[i] = InvSBOX[a->u8[i]];
+    }
+}
+
+static uint8_t ROTL8(uint8_t x, int n)
+{
+    return (x << n) | (x >> (8-n));
+}
+
+static inline int BIT8(uint8_t x, int n)
+{
+    return (x & (0x80 >> n)) != 0;
+}
+
+static uint8_t GFx02(uint8_t x)
+{
+    return ROTL8(x, 1) ^ (BIT8(x, 0) ? 0x1A : 0);
+}
+
+static uint8_t GFx03(uint8_t x)
+{
+    return x ^ ROTL8(x, 1) ^ (BIT8(x, 0) ? 0x1A : 0);
+}
+
+static uint8_t GFx09(uint8_t x)
+{
+    uint8_t term2 = ROTL8(x, 3);
+    uint8_t term3 = (BIT8(x, 0) ? 0x68 : 0) | (BIT8(x, 1) ? 0x14 : 0) |
+                    (BIT8(x, 2) ? 0x02 : 0);
+    uint8_t term4 = (BIT8(x, 1) ? 0x20 : 0) | (BIT8(x, 2) ? 0x18 : 0);
+    return x ^ term2 ^ term3 ^ term4;
+}
+
+static uint8_t GFx0B(uint8_t x)
+{
+    uint8_t term2 = ROTL8(x, 1);
+    uint8_t term3 = (x << 3) | (BIT8(x, 0) ? 0x06 : 0) |
+                    (BIT8(x, 2) ? 0x01 : 0);
+    uint8_t term4 = (BIT8(x, 0) ? 0x70 : 0) | (BIT8(x, 1) ? 0x06 : 0) |
+                    (BIT8(x, 2) ? 0x08 : 0);
+    uint8_t term5 = (BIT8(x, 1) ? 0x30 : 0) | (BIT8(x, 2) ? 0x02 : 0);
+    uint8_t term6 = BIT8(x, 2) ? 0x10 : 0;
+    return x ^ term2 ^ term3 ^ term4 ^ term5 ^ term6;
+}
+
+static uint8_t GFx0D(uint8_t x)
+{
+    uint8_t term2 = ROTL8(x, 2);
+    uint8_t term3 = (x << 3) | (BIT8(x, 1) ? 0x04 : 0) |
+                    (BIT8(x, 2) ? 0x03 : 0);
+    uint8_t term4 = (BIT8(x, 0) ? 0x58 : 0) | (BIT8(x, 1) ? 0x20 : 0);
+    uint8_t term5 = (BIT8(x, 1) ? 0x08 : 0) | (BIT8(x, 2) ? 0x10 : 0);
+    uint8_t term6 = BIT8(x, 2) ? 0x08 : 0;
+    return x ^ term2 ^ term3 ^ term4 ^ term5 ^ term6;
+}
+
+static uint8_t GFx0E(uint8_t x)
+{
+    uint8_t term1 = ROTL8(x, 1);
+    uint8_t term2 = (x << 2) | (BIT8(x, 2) ? 0x02 : 0) |
+                    (BIT8(x, 1) ? 0x01 : 0);
+    uint8_t term3 = (x << 3) | (BIT8(x, 1) ? 0x04 : 0) |
+                    (BIT8(x, 2) ? 0x01 : 0);
+    uint8_t term4 = (BIT8(x, 0) ? 0x40 : 0) | (BIT8(x, 1) ? 0x28 : 0) |
+                    (BIT8(x, 2) ? 0x10 : 0);
+    uint8_t term5 = (BIT8(x, 2) ? 0x08 : 0);
+    return term1 ^ term2 ^ term3 ^ term4 ^ term5;
+}
+
+#if defined(HOST_WORDS_BIGENDIAN)
+#define MCB(x, i, b) ((x)->u8[(i)*4 + (b)])
+#else
+#define MCB(x, i, b) ((x)->u8[15 - ((i)*4 + (b))])
+#endif
+
+static void MixColumns(ppc_avr_t *r, ppc_avr_t *x)
+{
+    int i;
+    for (i = 0; i < 4; i++) {
+        MCB(r, i, 0) = GFx02(MCB(x, i, 0)) ^ GFx03(MCB(x, i, 1)) ^
+                       MCB(x, i, 2) ^ MCB(x, i, 3);
+        MCB(r, i, 1) = MCB(x, i, 0) ^ GFx02(MCB(x, i, 1)) ^
+                       GFx03(MCB(x, i, 2)) ^ MCB(x, i, 3);
+        MCB(r, i, 2) = MCB(x, i, 0) ^ MCB(x, i, 1) ^
+                       GFx02(MCB(x, i, 2)) ^ GFx03(MCB(x, i, 3));
+        MCB(r, i, 3) = GFx03(MCB(x, i, 0)) ^ MCB(x, i, 1) ^
+                       MCB(x, i, 2) ^ GFx02(MCB(x, i, 3));
+    }
+}
+
+static void InvMixColumns(ppc_avr_t *r, ppc_avr_t *x)
+{
+    int i;
+    for (i = 0; i < 4; i++) {
+        MCB(r, i, 0) = GFx0E(MCB(x, i, 0)) ^ GFx0B(MCB(x, i, 1)) ^
+                       GFx0D(MCB(x, i, 2)) ^ GFx09(MCB(x, i, 3));
+        MCB(r, i, 1) = GFx09(MCB(x, i, 0)) ^ GFx0E(MCB(x, i, 1)) ^
+                       GFx0B(MCB(x, i, 2)) ^ GFx0D(MCB(x, i, 3));
+        MCB(r, i, 2) = GFx0D(MCB(x, i, 0)) ^ GFx09(MCB(x, i, 1)) ^
+                       GFx0E(MCB(x, i, 2)) ^ GFx0B(MCB(x, i, 3));
+        MCB(r, i, 3) = GFx0B(MCB(x, i, 0)) ^ GFx0D(MCB(x, i, 1)) ^
+                       GFx09(MCB(x, i, 2)) ^ GFx0E(MCB(x, i, 3));
+    }
+}
+
+static void ShiftRows(ppc_avr_t *r, ppc_avr_t *x)
+{
+    MCB(r, 0, 0) = MCB(x, 0, 0);
+    MCB(r, 1, 0) = MCB(x, 1, 0);
+    MCB(r, 2, 0) = MCB(x, 2, 0);
+    MCB(r, 3, 0) = MCB(x, 3, 0);
+
+    MCB(r, 0, 1) = MCB(x, 1, 1);
+    MCB(r, 1, 1) = MCB(x, 2, 1);
+    MCB(r, 2, 1) = MCB(x, 3, 1);
+    MCB(r, 3, 1) = MCB(x, 0, 1);
+
+    MCB(r, 0, 2) = MCB(x, 2, 2);
+    MCB(r, 1, 2) = MCB(x, 3, 2);
+    MCB(r, 2, 2) = MCB(x, 0, 2);
+    MCB(r, 3, 2) = MCB(x, 1, 2);
+
+    MCB(r, 0, 3) = MCB(x, 3, 3);
+    MCB(r, 1, 3) = MCB(x, 0, 3);
+    MCB(r, 2, 3) = MCB(x, 1, 3);
+    MCB(r, 3, 3) = MCB(x, 2, 3);
+}
+
+static void InvShiftRows(ppc_avr_t *r, ppc_avr_t *x)
+{
+    MCB(r, 0, 0) = MCB(x, 0, 0);
+    MCB(r, 1, 0) = MCB(x, 1, 0);
+    MCB(r, 2, 0) = MCB(x, 2, 0);
+    MCB(r, 3, 0) = MCB(x, 3, 0);
+
+    MCB(r, 0, 1) = MCB(x, 3, 1);
+    MCB(r, 1, 1) = MCB(x, 0, 1);
+    MCB(r, 2, 1) = MCB(x, 1, 1);
+    MCB(r, 3, 1) = MCB(x, 2, 1);
+
+    MCB(r, 0, 2) = MCB(x, 2, 2);
+    MCB(r, 1, 2) = MCB(x, 3, 2);
+    MCB(r, 2, 2) = MCB(x, 0, 2);
+    MCB(r, 3, 2) = MCB(x, 1, 2);
+
+    MCB(r, 0, 3) = MCB(x, 1, 3);
+    MCB(r, 1, 3) = MCB(x, 2, 3);
+    MCB(r, 2, 3) = MCB(x, 3, 3);
+    MCB(r, 3, 3) = MCB(x, 0, 3);
+}
+
+#undef MCB
+
+void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
+{
+    SubBytes(r, a);
+}
+
+void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+    ppc_avr_t vtemp1, vtemp2, vtemp3;
+    SubBytes(&vtemp1, a);
+    ShiftRows(&vtemp2, &vtemp1);
+    MixColumns(&vtemp3, &vtemp2);
+    r->u64[0] = vtemp3.u64[0] ^ b->u64[0];
+    r->u64[1] = vtemp3.u64[1] ^ b->u64[1];
+}
+
+void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+    ppc_avr_t vtemp1, vtemp2;
+    SubBytes(&vtemp1, a);
+    ShiftRows(&vtemp2, &vtemp1);
+    r->u64[0] = vtemp2.u64[0] ^ b->u64[0];
+    r->u64[1] = vtemp2.u64[1] ^ b->u64[1];
+}
+
+void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+    /* This differs from what is written in ISA V2.07.  The RTL is */
+    /* incorrect and will be fixed in V2.07B.                      */
+    ppc_avr_t vtemp1, vtemp2, vtemp3;
+    InvShiftRows(&vtemp1, a);
+    InvSubBytes(&vtemp2, &vtemp1);
+    vtemp3.u64[0] = vtemp2.u64[0] ^ b->u64[0];
+    vtemp3.u64[1] = vtemp2.u64[1] ^ b->u64[1];
+    InvMixColumns(r, &vtemp3);
+}
+
+void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+    ppc_avr_t vtemp1, vtemp2;
+    InvShiftRows(&vtemp1, a);
+    InvSubBytes(&vtemp2, &vtemp1);
+    r->u64[0] = vtemp2.u64[0] ^ b->u64[0];
+    r->u64[1] = vtemp2.u64[1] ^ b->u64[1];
+}
+
+#define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
+#if defined(HOST_WORDS_BIGENDIAN)
+#define EL_IDX(i) (i)
+#else
+#define EL_IDX(i) (3 - (i))
+#endif
+
+void helper_vshasigmaw(ppc_avr_t *r,  ppc_avr_t *a, uint32_t st_six)
+{
+    int st = (st_six & 0x10) != 0;
+    int six = st_six & 0xF;
+    int i;
+
+    VECTOR_FOR_INORDER_I(i, u32) {
+        if (st == 0) {
+            if ((six & (0x8 >> i)) == 0) {
+                r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
+                                    ROTRu32(a->u32[EL_IDX(i)], 18) ^
+                                    (a->u32[EL_IDX(i)] >> 3);
+            } else { /* six.bit[i] == 1 */
+                r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
+                                    ROTRu32(a->u32[EL_IDX(i)], 19) ^
+                                    (a->u32[EL_IDX(i)] >> 10);
+            }
+        } else { /* st == 1 */
+            if ((six & (0x8 >> i)) == 0) {
+                r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
+                                    ROTRu32(a->u32[EL_IDX(i)], 13) ^
+                                    ROTRu32(a->u32[EL_IDX(i)], 22);
+            } else { /* six.bit[i] == 1 */
+                r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
+                                    ROTRu32(a->u32[EL_IDX(i)], 11) ^
+                                    ROTRu32(a->u32[EL_IDX(i)], 25);
+            }
+        }
+    }
+}
+
+#undef ROTRu32
+#undef EL_IDX
+
+#define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
+#if defined(HOST_WORDS_BIGENDIAN)
+#define EL_IDX(i) (i)
+#else
+#define EL_IDX(i) (1 - (i))
+#endif
+
+void helper_vshasigmad(ppc_avr_t *r,  ppc_avr_t *a, uint32_t st_six)
+{
+    int st = (st_six & 0x10) != 0;
+    int six = st_six & 0xF;
+    int i;
+
+    VECTOR_FOR_INORDER_I(i, u64) {
+        if (st == 0) {
+            if ((six & (0x8 >> (2*i))) == 0) {
+                r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
+                                    ROTRu64(a->u64[EL_IDX(i)], 8) ^
+                                    (a->u64[EL_IDX(i)] >> 7);
+            } else { /* six.bit[2*i] == 1 */
+                r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
+                                    ROTRu64(a->u64[EL_IDX(i)], 61) ^
+                                    (a->u64[EL_IDX(i)] >> 6);
+            }
+        } else { /* st == 1 */
+            if ((six & (0x8 >> (2*i))) == 0) {
+                r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
+                                    ROTRu64(a->u64[EL_IDX(i)], 34) ^
+                                    ROTRu64(a->u64[EL_IDX(i)], 39);
+            } else { /* six.bit[2*i] == 1 */
+                r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
+                                    ROTRu64(a->u64[EL_IDX(i)], 18) ^
+                                    ROTRu64(a->u64[EL_IDX(i)], 41);
+            }
+        }
+    }
+}
+
+#undef ROTRu64
+#undef EL_IDX
+
+void helper_vpermxor(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
+{
+    int i;
+    VECTOR_FOR_INORDER_I(i, u8) {
+        int indexA = c->u8[i] >> 4;
+        int indexB = c->u8[i] & 0xF;
+#if defined(HOST_WORDS_BIGENDIAN)
+        r->u8[i] = a->u8[indexA] ^ b->u8[indexB];
+#else
+        r->u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
+#endif
+    }
+}
+
 #undef VECTOR_FOR_INORDER_I
 #undef HI_IDX
 #undef LO_IDX
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 7af3fe277d..32e7a8c0a7 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -36,6 +36,7 @@
 #include "hw/ppc/spapr.h"
 #include "hw/ppc/spapr_vio.h"
 #include "sysemu/watchdog.h"
+#include "trace.h"
 
 //#define DEBUG_KVM
 
@@ -401,7 +402,7 @@ static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 
 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 {
-    return cpu->cpu_index;
+    return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 }
 
 int kvm_arch_init_vcpu(CPUState *cs)
@@ -480,8 +481,7 @@ static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 
     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
     if (ret != 0) {
-        fprintf(stderr, "Warning: Unable to retrieve SPR %d from KVM: %s\n",
-                spr, strerror(errno));
+        trace_kvm_failed_spr_get(spr, strerror(errno));
     } else {
         switch (id & KVM_REG_SIZE_MASK) {
         case KVM_REG_SIZE_U32:
@@ -529,8 +529,7 @@ static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 
     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
     if (ret != 0) {
-        fprintf(stderr, "Warning: Unable to set SPR %d to KVM: %s\n",
-                spr, strerror(errno));
+        trace_kvm_failed_spr_set(spr, strerror(errno));
     }
 }
 
@@ -820,6 +819,9 @@ int kvm_arch_put_registers(CPUState *cs, int level)
 #ifdef TARGET_PPC64
         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
             sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
+            if (env->slb[i].esid & SLB_ESID_V) {
+                sregs.u.s.ppc64.slb[i].slbe |= i;
+            }
             sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
         }
 #endif
@@ -1029,7 +1031,9 @@ int kvm_arch_get_registers(CPUState *cs)
             return ret;
         }
 
-        ppc_store_sdr1(env, sregs.u.s.sdr1);
+        if (!env->external_htab) {
+            ppc_store_sdr1(env, sregs.u.s.sdr1);
+        }
 
         /* Sync SLB */
 #ifdef TARGET_PPC64
@@ -1766,22 +1770,14 @@ static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
     }
 }
 
-int kvmppc_fixup_cpu(PowerPCCPU *cpu)
+bool kvmppc_has_cap_epr(void)
 {
-    CPUState *cs = CPU(cpu);
-    int smt;
-
-    /* Adjust cpu index for SMT */
-    smt = kvmppc_smt_threads();
-    cs->cpu_index = (cs->cpu_index / smp_threads) * smt
-        + (cs->cpu_index % smp_threads);
-
-    return 0;
+    return cap_epr;
 }
 
-bool kvmppc_has_cap_epr(void)
+bool kvmppc_has_cap_htab_fd(void)
 {
-    return cap_epr;
+    return cap_htab_fd;
 }
 
 static int kvm_ppc_register_host_cpu_type(void)
@@ -1934,3 +1930,88 @@ void kvm_arch_remove_all_hw_breakpoints(void)
 void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
 {
 }
+
+struct kvm_get_htab_buf {
+    struct kvm_get_htab_header header;
+    /*
+     * We require one extra byte for read
+     */
+    target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
+};
+
+uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
+{
+    int htab_fd;
+    struct kvm_get_htab_fd ghf;
+    struct kvm_get_htab_buf  *hpte_buf;
+
+    ghf.flags = 0;
+    ghf.start_index = pte_index;
+    htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
+    if (htab_fd < 0) {
+        goto error_out;
+    }
+
+    hpte_buf = g_malloc0(sizeof(*hpte_buf));
+    /*
+     * Read the hpte group
+     */
+    if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
+        goto out_close;
+    }
+
+    close(htab_fd);
+    return (uint64_t)(uintptr_t) hpte_buf->hpte;
+
+out_close:
+    g_free(hpte_buf);
+    close(htab_fd);
+error_out:
+    return 0;
+}
+
+void kvmppc_hash64_free_pteg(uint64_t token)
+{
+    struct kvm_get_htab_buf *htab_buf;
+
+    htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
+                            hpte);
+    g_free(htab_buf);
+    return;
+}
+
+void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
+                             target_ulong pte0, target_ulong pte1)
+{
+    int htab_fd;
+    struct kvm_get_htab_fd ghf;
+    struct kvm_get_htab_buf hpte_buf;
+
+    ghf.flags = 0;
+    ghf.start_index = 0;     /* Ignored */
+    htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
+    if (htab_fd < 0) {
+        goto error_out;
+    }
+
+    hpte_buf.header.n_valid = 1;
+    hpte_buf.header.n_invalid = 0;
+    hpte_buf.header.index = pte_index;
+    hpte_buf.hpte[0] = pte0;
+    hpte_buf.hpte[1] = pte1;
+    /*
+     * Write the hpte entry.
+     * CAUTION: write() has the warn_unused_result attribute. Hence we
+     * need to check the return value, even though we do nothing.
+     */
+    if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
+        goto out_close;
+    }
+
+out_close:
+    close(htab_fd);
+    return;
+
+error_out:
+    return;
+}
diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
index 5f78e4be14..ff077ec502 100644
--- a/target-ppc/kvm_ppc.h
+++ b/target-ppc/kvm_ppc.h
@@ -36,13 +36,18 @@ int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size);
 int kvmppc_reset_htab(int shift_hint);
 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift);
 #endif /* !CONFIG_USER_ONLY */
-int kvmppc_fixup_cpu(PowerPCCPU *cpu);
 bool kvmppc_has_cap_epr(void);
 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function);
+bool kvmppc_has_cap_htab_fd(void);
 int kvmppc_get_htab_fd(bool write);
 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns);
 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
                            uint16_t n_valid, uint16_t n_invalid);
+uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index);
+void kvmppc_hash64_free_pteg(uint64_t token);
+
+void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
+                             target_ulong pte0, target_ulong pte1);
 
 #else
 
@@ -155,11 +160,6 @@ static inline int kvmppc_update_sdr1(CPUPPCState *env)
 
 #endif /* !CONFIG_USER_ONLY */
 
-static inline int kvmppc_fixup_cpu(PowerPCCPU *cpu)
-{
-    return -1;
-}
-
 static inline bool kvmppc_has_cap_epr(void)
 {
     return false;
@@ -171,6 +171,11 @@ static inline int kvmppc_define_rtas_kernel_token(uint32_t token,
     return -1;
 }
 
+static inline bool kvmppc_has_cap_htab_fd(void)
+{
+    return false;
+}
+
 static inline int kvmppc_get_htab_fd(bool write)
 {
     return -1;
@@ -188,6 +193,24 @@ static inline int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
     abort();
 }
 
+static inline uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu,
+                                               target_ulong pte_index)
+{
+    abort();
+}
+
+static inline void kvmppc_hash64_free_pteg(uint64_t token)
+{
+    abort();
+}
+
+static inline void kvmppc_hash64_write_pte(CPUPPCState *env,
+                                           target_ulong pte_index,
+                                           target_ulong pte0, target_ulong pte1)
+{
+    abort();
+}
+
 #endif
 
 #ifndef CONFIG_KVM
diff --git a/target-ppc/machine.c b/target-ppc/machine.c
index 12c174f7f3..2d46ceccca 100644
--- a/target-ppc/machine.c
+++ b/target-ppc/machine.c
@@ -70,7 +70,9 @@ static int cpu_load_old(QEMUFile *f, void *opaque, int version_id)
         qemu_get_betls(f, &env->pb[i]);
     for (i = 0; i < 1024; i++)
         qemu_get_betls(f, &env->spr[i]);
-    ppc_store_sdr1(env, sdr1);
+    if (!env->external_htab) {
+        ppc_store_sdr1(env, sdr1);
+    }
     qemu_get_be32s(f, &env->vscr);
     qemu_get_be64s(f, &env->spe_acc);
     qemu_get_be32s(f, &env->spe_fscr);
@@ -179,9 +181,10 @@ static int cpu_post_load(void *opaque, int version_id)
         env->IBAT[1][i+4] = env->spr[SPR_IBAT4U + 2*i + 1];
     }
 
-    /* Restore htab_base and htab_mask variables */
-    ppc_store_sdr1(env, env->spr[SPR_SDR1]);
-
+    if (!env->external_htab) {
+        /* Restore htab_base and htab_mask variables */
+        ppc_store_sdr1(env, env->spr[SPR_SDR1]);
+    }
     hreg_compute_hflags(env);
     hreg_compute_mem_idx(env);
 
diff --git a/target-ppc/misc_helper.c b/target-ppc/misc_helper.c
index 616aab6fb6..dc2ebfc452 100644
--- a/target-ppc/misc_helper.c
+++ b/target-ppc/misc_helper.c
@@ -38,7 +38,9 @@ void helper_store_dump_spr(CPUPPCState *env, uint32_t sprn)
 
 void helper_store_sdr1(CPUPPCState *env, target_ulong val)
 {
-    ppc_store_sdr1(env, val);
+    if (!env->external_htab) {
+        ppc_store_sdr1(env, val);
+    }
 }
 
 void helper_store_hid0_601(CPUPPCState *env, target_ulong val)
diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
index 67fc1b5dec..f2af4fbaa7 100644
--- a/target-ppc/mmu-hash64.c
+++ b/target-ppc/mmu-hash64.c
@@ -41,6 +41,11 @@
 #endif
 
 /*
+ * Used to indicate whether we have allocated htab in the
+ * host kernel
+ */
+bool kvmppc_kern_htab;
+/*
  * SLB handling
  */
 
@@ -278,12 +283,12 @@ static int ppc_hash64_pte_prot(CPUPPCState *env,
 static int ppc_hash64_amr_prot(CPUPPCState *env, ppc_hash_pte64_t pte)
 {
     int key, amrbits;
-    int prot = PAGE_EXEC;
+    int prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
 
 
     /* Only recent MMUs implement Virtual Page Class Key Protection */
     if (!(env->mmu_model & POWERPC_MMU_AMR)) {
-        return PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+        return prot;
     }
 
     key = HPTE64_R_KEY(pte.pte1);
@@ -292,39 +297,94 @@ static int ppc_hash64_amr_prot(CPUPPCState *env, ppc_hash_pte64_t pte)
     /* fprintf(stderr, "AMR protection: key=%d AMR=0x%" PRIx64 "\n", key, */
     /*         env->spr[SPR_AMR]); */
 
+    /*
+     * A store is permitted if the AMR bit is 0. Remove write
+     * protection if it is set.
+     */
     if (amrbits & 0x2) {
-        prot |= PAGE_WRITE;
+        prot &= ~PAGE_WRITE;
     }
+    /*
+     * A load is permitted if the AMR bit is 0. Remove read
+     * protection if it is set.
+     */
     if (amrbits & 0x1) {
-        prot |= PAGE_READ;
+        prot &= ~PAGE_READ;
     }
 
     return prot;
 }
 
-static hwaddr ppc_hash64_pteg_search(CPUPPCState *env, hwaddr pteg_off,
+uint64_t ppc_hash64_start_access(PowerPCCPU *cpu, target_ulong pte_index)
+{
+    uint64_t token = 0;
+    hwaddr pte_offset;
+
+    pte_offset = pte_index * HASH_PTE_SIZE_64;
+    if (kvmppc_kern_htab) {
+        /*
+         * HTAB is controlled by KVM. Fetch the PTEG into a new buffer.
+         */
+        token = kvmppc_hash64_read_pteg(cpu, pte_index);
+        if (token) {
+            return token;
+        }
+        /*
+         * pteg read failed, even though we have allocated htab via
+         * kvmppc_reset_htab.
+         */
+        return 0;
+    }
+    /*
+     * HTAB is controlled by QEMU. Just point to the internally
+     * accessible PTEG.
+     */
+    if (cpu->env.external_htab) {
+        token = (uint64_t)(uintptr_t) cpu->env.external_htab + pte_offset;
+    } else if (cpu->env.htab_base) {
+        token = cpu->env.htab_base + pte_offset;
+    }
+    return token;
+}
+
+void ppc_hash64_stop_access(uint64_t token)
+{
+    if (kvmppc_kern_htab) {
+        return kvmppc_hash64_free_pteg(token);
+    }
+}
+
+static hwaddr ppc_hash64_pteg_search(CPUPPCState *env, hwaddr hash,
                                      bool secondary, target_ulong ptem,
                                      ppc_hash_pte64_t *pte)
 {
-    hwaddr pte_offset = pteg_off;
-    target_ulong pte0, pte1;
     int i;
+    uint64_t token;
+    target_ulong pte0, pte1;
+    target_ulong pte_index;
 
+    pte_index = (hash & env->htab_mask) * HPTES_PER_GROUP;
+    token = ppc_hash64_start_access(ppc_env_get_cpu(env), pte_index);
+    if (!token) {
+        return -1;
+    }
     for (i = 0; i < HPTES_PER_GROUP; i++) {
-        pte0 = ppc_hash64_load_hpte0(env, pte_offset);
-        pte1 = ppc_hash64_load_hpte1(env, pte_offset);
+        pte0 = ppc_hash64_load_hpte0(env, token, i);
+        pte1 = ppc_hash64_load_hpte1(env, token, i);
 
         if ((pte0 & HPTE64_V_VALID)
             && (secondary == !!(pte0 & HPTE64_V_SECONDARY))
             && HPTE64_V_COMPARE(pte0, ptem)) {
             pte->pte0 = pte0;
             pte->pte1 = pte1;
-            return pte_offset;
+            ppc_hash64_stop_access(token);
+            return (pte_index + i) * HASH_PTE_SIZE_64;
         }
-
-        pte_offset += HASH_PTE_SIZE_64;
     }
-
+    ppc_hash64_stop_access(token);
+    /*
+     * We didn't find a valid entry.
+     */
     return -1;
 }
 
@@ -332,7 +392,7 @@ static hwaddr ppc_hash64_htab_lookup(CPUPPCState *env,
                                      ppc_slb_t *slb, target_ulong eaddr,
                                      ppc_hash_pte64_t *pte)
 {
-    hwaddr pteg_off, pte_offset;
+    hwaddr pte_offset;
     hwaddr hash;
     uint64_t vsid, epnshift, epnmask, epn, ptem;
 
@@ -367,8 +427,7 @@ static hwaddr ppc_hash64_htab_lookup(CPUPPCState *env,
             " vsid=" TARGET_FMT_lx " ptem=" TARGET_FMT_lx
             " hash=" TARGET_FMT_plx "\n",
             env->htab_base, env->htab_mask, vsid, ptem,  hash);
-    pteg_off = (hash * HASH_PTEG_SIZE_64) & env->htab_mask;
-    pte_offset = ppc_hash64_pteg_search(env, pteg_off, 0, ptem, pte);
+    pte_offset = ppc_hash64_pteg_search(env, hash, 0, ptem, pte);
 
     if (pte_offset == -1) {
         /* Secondary PTEG lookup */
@@ -377,8 +436,7 @@ static hwaddr ppc_hash64_htab_lookup(CPUPPCState *env,
                 " hash=" TARGET_FMT_plx "\n", env->htab_base,
                 env->htab_mask, vsid, ptem, ~hash);
 
-        pteg_off = (~hash * HASH_PTEG_SIZE_64) & env->htab_mask;
-        pte_offset = ppc_hash64_pteg_search(env, pteg_off, 1, ptem, pte);
+        pte_offset = ppc_hash64_pteg_search(env, ~hash, 1, ptem, pte);
     }
 
     return pte_offset;
@@ -508,7 +566,8 @@ int ppc_hash64_handle_mmu_fault(CPUPPCState *env, target_ulong eaddr,
     }
 
     if (new_pte1 != pte.pte1) {
-        ppc_hash64_store_hpte1(env, pte_offset, new_pte1);
+        ppc_hash64_store_hpte(env, pte_offset / HASH_PTE_SIZE_64,
+                              pte.pte0, new_pte1);
     }
 
     /* 7. Determine the real address from the PTE */
@@ -544,3 +603,23 @@ hwaddr ppc_hash64_get_phys_page_debug(CPUPPCState *env, target_ulong addr)
 
     return ppc_hash64_pte_raddr(slb, pte, addr) & TARGET_PAGE_MASK;
 }
+
+void ppc_hash64_store_hpte(CPUPPCState *env,
+                           target_ulong pte_index,
+                           target_ulong pte0, target_ulong pte1)
+{
+    CPUState *cs = ENV_GET_CPU(env);
+
+    if (kvmppc_kern_htab) {
+        return kvmppc_hash64_write_pte(env, pte_index, pte0, pte1);
+    }
+
+    pte_index *= HASH_PTE_SIZE_64;
+    if (env->external_htab) {
+        stq_p(env->external_htab + pte_index, pte0);
+        stq_p(env->external_htab + pte_index + HASH_PTE_SIZE_64/2, pte1);
+    } else {
+        stq_phys(cs->as, env->htab_base + pte_index, pte0);
+        stq_phys(cs->as, env->htab_base + pte_index + HASH_PTE_SIZE_64/2, pte1);
+    }
+}
diff --git a/target-ppc/mmu-hash64.h b/target-ppc/mmu-hash64.h
index a8da558ca2..1746b3e2d3 100644
--- a/target-ppc/mmu-hash64.h
+++ b/target-ppc/mmu-hash64.h
@@ -9,6 +9,8 @@ int ppc_store_slb (CPUPPCState *env, target_ulong rb, target_ulong rs);
 hwaddr ppc_hash64_get_phys_page_debug(CPUPPCState *env, target_ulong addr);
 int ppc_hash64_handle_mmu_fault(CPUPPCState *env, target_ulong address, int rw,
                                 int mmu_idx);
+void ppc_hash64_store_hpte(CPUPPCState *env, target_ulong index,
+                           target_ulong pte0, target_ulong pte1);
 #endif
 
 /*
@@ -75,49 +77,34 @@ int ppc_hash64_handle_mmu_fault(CPUPPCState *env, target_ulong address, int rw,
 #define HPTE64_V_1TB_SEG        0x4000000000000000ULL
 #define HPTE64_V_VRMA_MASK      0x4001ffffff000000ULL
 
-static inline target_ulong ppc_hash64_load_hpte0(CPUPPCState *env,
-                                                 hwaddr pte_offset)
-{
-    CPUState *cs = ENV_GET_CPU(env);
-    if (env->external_htab) {
-        return  ldq_p(env->external_htab + pte_offset);
-    } else {
-        return ldq_phys(cs->as, env->htab_base + pte_offset);
-    }
-}
 
-static inline target_ulong ppc_hash64_load_hpte1(CPUPPCState *env,
-                                                 hwaddr pte_offset)
-{
-    CPUState *cs = ENV_GET_CPU(env);
-    if (env->external_htab) {
-        return ldq_p(env->external_htab + pte_offset + HASH_PTE_SIZE_64/2);
-    } else {
-        return ldq_phys(cs->as,
-                        env->htab_base + pte_offset + HASH_PTE_SIZE_64/2);
-    }
-}
+extern bool kvmppc_kern_htab;
+uint64_t ppc_hash64_start_access(PowerPCCPU *cpu, target_ulong pte_index);
+void ppc_hash64_stop_access(uint64_t token);
 
-static inline void ppc_hash64_store_hpte0(CPUPPCState *env,
-                                          hwaddr pte_offset, target_ulong pte0)
+static inline target_ulong ppc_hash64_load_hpte0(CPUPPCState *env,
+                                                 uint64_t token, int index)
 {
     CPUState *cs = ENV_GET_CPU(env);
+    uint64_t addr;
+    addr = token + (index * HASH_PTE_SIZE_64);
     if (env->external_htab) {
-        stq_p(env->external_htab + pte_offset, pte0);
+        return  ldq_p((const void *)(uintptr_t)addr);
     } else {
-        stq_phys(cs->as, env->htab_base + pte_offset, pte0);
+        return ldq_phys(cs->as, addr);
     }
 }
 
-static inline void ppc_hash64_store_hpte1(CPUPPCState *env,
-                                          hwaddr pte_offset, target_ulong pte1)
+static inline target_ulong ppc_hash64_load_hpte1(CPUPPCState *env,
+                                                 uint64_t token, int index)
 {
     CPUState *cs = ENV_GET_CPU(env);
+    uint64_t addr;
+    addr = token + (index * HASH_PTE_SIZE_64) + HASH_PTE_SIZE_64/2;
     if (env->external_htab) {
-        stq_p(env->external_htab + pte_offset + HASH_PTE_SIZE_64/2, pte1);
+        return  ldq_p((const void *)(uintptr_t)addr);
     } else {
-        stq_phys(cs->as,
-                 env->htab_base + pte_offset + HASH_PTE_SIZE_64/2, pte1);
+        return ldq_phys(cs->as, addr);
     }
 }
 
diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
index 04a840b016..8e2f8e736a 100644
--- a/target-ppc/mmu_helper.c
+++ b/target-ppc/mmu_helper.c
@@ -2014,6 +2014,7 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr)
 void ppc_store_sdr1(CPUPPCState *env, target_ulong value)
 {
     LOG_MMU("%s: " TARGET_FMT_lx "\n", __func__, value);
+    assert(!env->external_htab);
     if (env->spr[SPR_SDR1] != value) {
         env->spr[SPR_SDR1] = value;
 #if defined(TARGET_PPC64)
@@ -2025,7 +2026,7 @@ void ppc_store_sdr1(CPUPPCState *env, target_ulong value)
                         " stored in SDR1\n", htabsize);
                 htabsize = 28;
             }
-            env->htab_mask = (1ULL << (htabsize + 18)) - 1;
+            env->htab_mask = (1ULL << (htabsize + 18 - 7)) - 1;
             env->htab_base = value & SDR_64_HTABORG;
         } else
 #endif /* defined(TARGET_PPC64) */
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index c5c1108e92..91c33dcd1d 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -387,6 +387,8 @@ EXTRACT_HELPER(opc2, 1, 5);
 EXTRACT_HELPER(opc3, 6, 5);
 /* Update Cr0 flags */
 EXTRACT_HELPER(Rc, 0, 1);
+/* Update Cr6 flags (Altivec) */
+EXTRACT_HELPER(Rc21, 10, 1);
 /* Destination */
 EXTRACT_HELPER(rD, 21, 5);
 /* Source */
@@ -622,6 +624,20 @@ static opc_handler_t invalid_handler = {
     .handler = gen_invalid,
 };
 
+#if defined(TARGET_PPC64)
+/* NOTE: as this time, the only use of is_user_mode() is in 64 bit code.  And */
+/*       so the function is wrapped in the standard 64-bit ifdef in order to  */
+/*       avoid compiler warnings in 32-bit implementations.                   */
+static bool is_user_mode(DisasContext *ctx)
+{
+#if defined(CONFIG_USER_ONLY)
+    return true;
+#else
+    return ctx->mem_idx == 0;
+#endif
+}
+#endif
+
 /***                           Integer comparison                          ***/
 
 static inline void gen_op_cmp(TCGv arg0, TCGv arg1, int s, int crf)
@@ -984,6 +1000,25 @@ GEN_INT_ARITH_DIVW(divwuo, 0x1E, 0, 1);
 /* divw  divw.  divwo  divwo.   */
 GEN_INT_ARITH_DIVW(divw, 0x0F, 1, 0);
 GEN_INT_ARITH_DIVW(divwo, 0x1F, 1, 1);
+
+/* div[wd]eu[o][.] */
+#define GEN_DIVE(name, hlpr, compute_ov)                                      \
+static void gen_##name(DisasContext *ctx)                                     \
+{                                                                             \
+    TCGv_i32 t0 = tcg_const_i32(compute_ov);                                  \
+    gen_helper_##hlpr(cpu_gpr[rD(ctx->opcode)], cpu_env,                      \
+                     cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], t0); \
+    tcg_temp_free_i32(t0);                                                    \
+    if (unlikely(Rc(ctx->opcode) != 0)) {                                     \
+        gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);                           \
+    }                                                                         \
+}
+
+GEN_DIVE(divweu, divweu, 0);
+GEN_DIVE(divweuo, divweu, 1);
+GEN_DIVE(divwe, divwe, 0);
+GEN_DIVE(divweo, divwe, 1);
+
 #if defined(TARGET_PPC64)
 static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret, TCGv arg1,
                                      TCGv arg2, int sign, int compute_ov)
@@ -1032,6 +1067,11 @@ GEN_INT_ARITH_DIVD(divduo, 0x1E, 0, 1);
 /* divw  divw.  divwo  divwo.   */
 GEN_INT_ARITH_DIVD(divd, 0x0F, 1, 0);
 GEN_INT_ARITH_DIVD(divdo, 0x1F, 1, 1);
+
+GEN_DIVE(divdeu, divdeu, 0);
+GEN_DIVE(divdeuo, divdeu, 1);
+GEN_DIVE(divde, divde, 0);
+GEN_DIVE(divdeo, divde, 1);
 #endif
 
 /* mulhw  mulhw. */
@@ -1525,6 +1565,15 @@ static void gen_prtyd(DisasContext *ctx)
 #endif
 
 #if defined(TARGET_PPC64)
+/* bpermd */
+static void gen_bpermd(DisasContext *ctx)
+{
+    gen_helper_bpermd(cpu_gpr[rA(ctx->opcode)],
+                      cpu_gpr[rS(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
+}
+#endif
+
+#if defined(TARGET_PPC64)
 /* extsw & extsw. */
 GEN_LOGICAL1(extsw, tcg_gen_ext32s_tl, 0x1E, PPC_64B);
 
@@ -2169,17 +2218,31 @@ GEN_FLOAT_ACB(nmsub, 0x1E, 1, PPC_FLOAT);
 /***                     Floating-Point round & convert                    ***/
 /* fctiw */
 GEN_FLOAT_B(ctiw, 0x0E, 0x00, 0, PPC_FLOAT);
+/* fctiwu */
+GEN_FLOAT_B(ctiwu, 0x0E, 0x04, 0, PPC2_FP_CVT_ISA206);
 /* fctiwz */
 GEN_FLOAT_B(ctiwz, 0x0F, 0x00, 0, PPC_FLOAT);
+/* fctiwuz */
+GEN_FLOAT_B(ctiwuz, 0x0F, 0x04, 0, PPC2_FP_CVT_ISA206);
 /* frsp */
 GEN_FLOAT_B(rsp, 0x0C, 0x00, 1, PPC_FLOAT);
 #if defined(TARGET_PPC64)
 /* fcfid */
 GEN_FLOAT_B(cfid, 0x0E, 0x1A, 1, PPC_64B);
+/* fcfids */
+GEN_FLOAT_B(cfids, 0x0E, 0x1A, 0, PPC2_FP_CVT_ISA206);
+/* fcfidu */
+GEN_FLOAT_B(cfidu, 0x0E, 0x1E, 0, PPC2_FP_CVT_ISA206);
+/* fcfidus */
+GEN_FLOAT_B(cfidus, 0x0E, 0x1E, 0, PPC2_FP_CVT_ISA206);
 /* fctid */
 GEN_FLOAT_B(ctid, 0x0E, 0x19, 0, PPC_64B);
+/* fctidu */
+GEN_FLOAT_B(ctidu, 0x0E, 0x1D, 0, PPC2_FP_CVT_ISA206);
 /* fctidz */
 GEN_FLOAT_B(ctidz, 0x0F, 0x19, 0, PPC_64B);
+/* fctidu */
+GEN_FLOAT_B(ctiduz, 0x0F, 0x1D, 0, PPC2_FP_CVT_ISA206);
 #endif
 
 /* frin */
@@ -2191,6 +2254,27 @@ GEN_FLOAT_B(rip, 0x08, 0x0E, 1, PPC_FLOAT_EXT);
 /* frim */
 GEN_FLOAT_B(rim, 0x08, 0x0F, 1, PPC_FLOAT_EXT);
 
+static void gen_ftdiv(DisasContext *ctx)
+{
+    if (unlikely(!ctx->fpu_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_FPU);
+        return;
+    }
+    gen_helper_ftdiv(cpu_crf[crfD(ctx->opcode)], cpu_fpr[rA(ctx->opcode)],
+                     cpu_fpr[rB(ctx->opcode)]);
+}
+
+static void gen_ftsqrt(DisasContext *ctx)
+{
+    if (unlikely(!ctx->fpu_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_FPU);
+        return;
+    }
+    gen_helper_ftsqrt(cpu_crf[crfD(ctx->opcode)], cpu_fpr[rB(ctx->opcode)]);
+}
+
+
+
 /***                         Floating-Point compare                        ***/
 
 /* fcmpo */
@@ -2294,6 +2378,32 @@ static void gen_fcpsgn(DisasContext *ctx)
     gen_compute_fprf(cpu_fpr[rD(ctx->opcode)], 0, Rc(ctx->opcode) != 0);
 }
 
+static void gen_fmrgew(DisasContext *ctx)
+{
+    TCGv_i64 b0;
+    if (unlikely(!ctx->fpu_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_FPU);
+        return;
+    }
+    b0 = tcg_temp_new_i64();
+    tcg_gen_shri_i64(b0, cpu_fpr[rB(ctx->opcode)], 32);
+    tcg_gen_deposit_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpr[rA(ctx->opcode)],
+                        b0, 0, 32);
+    tcg_temp_free_i64(b0);
+}
+
+static void gen_fmrgow(DisasContext *ctx)
+{
+    if (unlikely(!ctx->fpu_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_FPU);
+        return;
+    }
+    tcg_gen_deposit_i64(cpu_fpr[rD(ctx->opcode)],
+                        cpu_fpr[rB(ctx->opcode)],
+                        cpu_fpr[rA(ctx->opcode)],
+                        32, 32);
+}
+
 /***                  Floating-Point status & ctrl register                ***/
 
 /* mcrfs */
@@ -2585,6 +2695,14 @@ static inline void gen_qemu_ld32s(DisasContext *ctx, TCGv arg1, TCGv arg2)
         tcg_gen_qemu_ld32s(arg1, arg2, ctx->mem_idx);
 }
 
+static void gen_qemu_ld32s_i64(DisasContext *ctx, TCGv_i64 val, TCGv addr)
+{
+    TCGv tmp = tcg_temp_new();
+    gen_qemu_ld32s(ctx, tmp, addr);
+    tcg_gen_ext_tl_i64(val, tmp);
+    tcg_temp_free(tmp);
+}
+
 static inline void gen_qemu_ld64(DisasContext *ctx, TCGv_i64 arg1, TCGv arg2)
 {
     tcg_gen_qemu_ld64(arg1, arg2, ctx->mem_idx);
@@ -2756,36 +2874,44 @@ static void gen_ld(DisasContext *ctx)
 /* lq */
 static void gen_lq(DisasContext *ctx)
 {
-#if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-#else
     int ra, rd;
     TCGv EA;
 
-    /* Restore CPU state */
-    if (unlikely(ctx->mem_idx == 0)) {
+    /* lq is a legal user mode instruction starting in ISA 2.07 */
+    bool legal_in_user_mode = (ctx->insns_flags2 & PPC2_LSQ_ISA207) != 0;
+    bool le_is_supported = (ctx->insns_flags2 & PPC2_LSQ_ISA207) != 0;
+
+    if (!legal_in_user_mode && is_user_mode(ctx)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
+
+    if (!le_is_supported && ctx->le_mode) {
+        gen_exception_err(ctx, POWERPC_EXCP_ALIGN, POWERPC_EXCP_ALIGN_LE);
+        return;
+    }
+
     ra = rA(ctx->opcode);
     rd = rD(ctx->opcode);
     if (unlikely((rd & 1) || rd == ra)) {
         gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
         return;
     }
-    if (unlikely(ctx->le_mode)) {
-        /* Little-endian mode is not handled */
-        gen_exception_err(ctx, POWERPC_EXCP_ALIGN, POWERPC_EXCP_ALIGN_LE);
-        return;
-    }
+
     gen_set_access_type(ctx, ACCESS_INT);
     EA = tcg_temp_new();
     gen_addr_imm_index(ctx, EA, 0x0F);
-    gen_qemu_ld64(ctx, cpu_gpr[rd], EA);
-    gen_addr_add(ctx, EA, EA, 8);
-    gen_qemu_ld64(ctx, cpu_gpr[rd+1], EA);
+
+    if (unlikely(ctx->le_mode)) {
+        gen_qemu_ld64(ctx, cpu_gpr[rd+1], EA);
+        gen_addr_add(ctx, EA, EA, 8);
+        gen_qemu_ld64(ctx, cpu_gpr[rd], EA);
+    } else {
+        gen_qemu_ld64(ctx, cpu_gpr[rd], EA);
+        gen_addr_add(ctx, EA, EA, 8);
+        gen_qemu_ld64(ctx, cpu_gpr[rd+1], EA);
+    }
     tcg_temp_free(EA);
-#endif
 }
 #endif
 
@@ -2871,34 +2997,41 @@ static void gen_std(DisasContext *ctx)
     TCGv EA;
 
     rs = rS(ctx->opcode);
-    if ((ctx->opcode & 0x3) == 0x2) {
-#if defined(CONFIG_USER_ONLY)
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-#else
-        /* stq */
-        if (unlikely(ctx->mem_idx == 0)) {
+    if ((ctx->opcode & 0x3) == 0x2) { /* stq */
+
+        bool legal_in_user_mode = (ctx->insns_flags2 & PPC2_LSQ_ISA207) != 0;
+        bool le_is_supported = (ctx->insns_flags2 & PPC2_LSQ_ISA207) != 0;
+
+        if (!legal_in_user_mode && is_user_mode(ctx)) {
             gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
             return;
         }
-        if (unlikely(rs & 1)) {
-            gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
+
+        if (!le_is_supported && ctx->le_mode) {
+            gen_exception_err(ctx, POWERPC_EXCP_ALIGN, POWERPC_EXCP_ALIGN_LE);
             return;
         }
-        if (unlikely(ctx->le_mode)) {
-            /* Little-endian mode is not handled */
-            gen_exception_err(ctx, POWERPC_EXCP_ALIGN, POWERPC_EXCP_ALIGN_LE);
+
+        if (unlikely(rs & 1)) {
+            gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
             return;
         }
         gen_set_access_type(ctx, ACCESS_INT);
         EA = tcg_temp_new();
         gen_addr_imm_index(ctx, EA, 0x03);
-        gen_qemu_st64(ctx, cpu_gpr[rs], EA);
-        gen_addr_add(ctx, EA, EA, 8);
-        gen_qemu_st64(ctx, cpu_gpr[rs+1], EA);
+
+        if (unlikely(ctx->le_mode)) {
+            gen_qemu_st64(ctx, cpu_gpr[rs+1], EA);
+            gen_addr_add(ctx, EA, EA, 8);
+            gen_qemu_st64(ctx, cpu_gpr[rs], EA);
+        } else {
+            gen_qemu_st64(ctx, cpu_gpr[rs], EA);
+            gen_addr_add(ctx, EA, EA, 8);
+            gen_qemu_st64(ctx, cpu_gpr[rs+1], EA);
+        }
         tcg_temp_free(EA);
-#endif
     } else {
-        /* std / stdu */
+        /* std / stdu*/
         if (Rc(ctx->opcode)) {
             if (unlikely(rA(ctx->opcode) == 0)) {
                 gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
@@ -3140,24 +3273,32 @@ static void gen_isync(DisasContext *ctx)
     gen_stop_exception(ctx);
 }
 
-/* lwarx */
-static void gen_lwarx(DisasContext *ctx)
-{
-    TCGv t0;
-    TCGv gpr = cpu_gpr[rD(ctx->opcode)];
-    gen_set_access_type(ctx, ACCESS_RES);
-    t0 = tcg_temp_local_new();
-    gen_addr_reg_index(ctx, t0);
-    gen_check_align(ctx, t0, 0x03);
-    gen_qemu_ld32u(ctx, gpr, t0);
-    tcg_gen_mov_tl(cpu_reserve, t0);
-    tcg_gen_st_tl(gpr, cpu_env, offsetof(CPUPPCState, reserve_val));
-    tcg_temp_free(t0);
+#define LARX(name, len, loadop)                                      \
+static void gen_##name(DisasContext *ctx)                            \
+{                                                                    \
+    TCGv t0;                                                         \
+    TCGv gpr = cpu_gpr[rD(ctx->opcode)];                             \
+    gen_set_access_type(ctx, ACCESS_RES);                            \
+    t0 = tcg_temp_local_new();                                       \
+    gen_addr_reg_index(ctx, t0);                                     \
+    if ((len) > 1) {                                                 \
+        gen_check_align(ctx, t0, (len)-1);                           \
+    }                                                                \
+    gen_qemu_##loadop(ctx, gpr, t0);                                 \
+    tcg_gen_mov_tl(cpu_reserve, t0);                                 \
+    tcg_gen_st_tl(gpr, cpu_env, offsetof(CPUPPCState, reserve_val)); \
+    tcg_temp_free(t0);                                               \
 }
 
+/* lwarx */
+LARX(lbarx, 1, ld8u);
+LARX(lharx, 2, ld16u);
+LARX(lwarx, 4, ld32u);
+
+
 #if defined(CONFIG_USER_ONLY)
-static void gen_conditional_store (DisasContext *ctx, TCGv EA,
-                                   int reg, int size)
+static void gen_conditional_store(DisasContext *ctx, TCGv EA,
+                                  int reg, int size)
 {
     TCGv t0 = tcg_temp_new();
     uint32_t save_exception = ctx->exception;
@@ -3171,74 +3312,115 @@ static void gen_conditional_store (DisasContext *ctx, TCGv EA,
     gen_exception(ctx, POWERPC_EXCP_STCX);
     ctx->exception = save_exception;
 }
-#endif
-
-/* stwcx. */
-static void gen_stwcx_(DisasContext *ctx)
-{
-    TCGv t0;
-    gen_set_access_type(ctx, ACCESS_RES);
-    t0 = tcg_temp_local_new();
-    gen_addr_reg_index(ctx, t0);
-    gen_check_align(ctx, t0, 0x03);
-#if defined(CONFIG_USER_ONLY)
-    gen_conditional_store(ctx, t0, rS(ctx->opcode), 4);
 #else
-    {
-        int l1;
+static void gen_conditional_store(DisasContext *ctx, TCGv EA,
+                                  int reg, int size)
+{
+    int l1;
 
-        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
-        l1 = gen_new_label();
-        tcg_gen_brcond_tl(TCG_COND_NE, t0, cpu_reserve, l1);
-        tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 1 << CRF_EQ);
-        gen_qemu_st32(ctx, cpu_gpr[rS(ctx->opcode)], t0);
-        gen_set_label(l1);
-        tcg_gen_movi_tl(cpu_reserve, -1);
+    tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
+    l1 = gen_new_label();
+    tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, l1);
+    tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 1 << CRF_EQ);
+#if defined(TARGET_PPC64)
+    if (size == 8) {
+        gen_qemu_st64(ctx, cpu_gpr[reg], EA);
+    } else
+#endif
+    if (size == 4) {
+        gen_qemu_st32(ctx, cpu_gpr[reg], EA);
+    } else if (size == 2) {
+        gen_qemu_st16(ctx, cpu_gpr[reg], EA);
+#if defined(TARGET_PPC64)
+    } else if (size == 16) {
+        TCGv gpr1, gpr2 , EA8;
+        if (unlikely(ctx->le_mode)) {
+            gpr1 = cpu_gpr[reg+1];
+            gpr2 = cpu_gpr[reg];
+        } else {
+            gpr1 = cpu_gpr[reg];
+            gpr2 = cpu_gpr[reg+1];
+        }
+        gen_qemu_st64(ctx, gpr1, EA);
+        EA8 = tcg_temp_local_new();
+        gen_addr_add(ctx, EA8, EA, 8);
+        gen_qemu_st64(ctx, gpr2, EA8);
+        tcg_temp_free(EA8);
+#endif
+    } else {
+        gen_qemu_st8(ctx, cpu_gpr[reg], EA);
     }
+    gen_set_label(l1);
+    tcg_gen_movi_tl(cpu_reserve, -1);
+}
 #endif
-    tcg_temp_free(t0);
+
+#define STCX(name, len)                                   \
+static void gen_##name(DisasContext *ctx)                 \
+{                                                         \
+    TCGv t0;                                              \
+    if (unlikely((len == 16) && (rD(ctx->opcode) & 1))) { \
+        gen_inval_exception(ctx,                          \
+                            POWERPC_EXCP_INVAL_INVAL);    \
+        return;                                           \
+    }                                                     \
+    gen_set_access_type(ctx, ACCESS_RES);                 \
+    t0 = tcg_temp_local_new();                            \
+    gen_addr_reg_index(ctx, t0);                          \
+    if (len > 1) {                                        \
+        gen_check_align(ctx, t0, (len)-1);                \
+    }                                                     \
+    gen_conditional_store(ctx, t0, rS(ctx->opcode), len); \
+    tcg_temp_free(t0);                                    \
 }
 
+STCX(stbcx_, 1);
+STCX(sthcx_, 2);
+STCX(stwcx_, 4);
+
 #if defined(TARGET_PPC64)
 /* ldarx */
-static void gen_ldarx(DisasContext *ctx)
-{
-    TCGv t0;
-    TCGv gpr = cpu_gpr[rD(ctx->opcode)];
-    gen_set_access_type(ctx, ACCESS_RES);
-    t0 = tcg_temp_local_new();
-    gen_addr_reg_index(ctx, t0);
-    gen_check_align(ctx, t0, 0x07);
-    gen_qemu_ld64(ctx, gpr, t0);
-    tcg_gen_mov_tl(cpu_reserve, t0);
-    tcg_gen_st_tl(gpr, cpu_env, offsetof(CPUPPCState, reserve_val));
-    tcg_temp_free(t0);
-}
+LARX(ldarx, 8, ld64);
 
-/* stdcx. */
-static void gen_stdcx_(DisasContext *ctx)
+/* lqarx */
+static void gen_lqarx(DisasContext *ctx)
 {
-    TCGv t0;
+    TCGv EA;
+    int rd = rD(ctx->opcode);
+    TCGv gpr1, gpr2;
+
+    if (unlikely((rd & 1) || (rd == rA(ctx->opcode)) ||
+                 (rd == rB(ctx->opcode)))) {
+        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
+        return;
+    }
+
     gen_set_access_type(ctx, ACCESS_RES);
-    t0 = tcg_temp_local_new();
-    gen_addr_reg_index(ctx, t0);
-    gen_check_align(ctx, t0, 0x07);
-#if defined(CONFIG_USER_ONLY)
-    gen_conditional_store(ctx, t0, rS(ctx->opcode), 8);
-#else
-    {
-        int l1;
-        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
-        l1 = gen_new_label();
-        tcg_gen_brcond_tl(TCG_COND_NE, t0, cpu_reserve, l1);
-        tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 1 << CRF_EQ);
-        gen_qemu_st64(ctx, cpu_gpr[rS(ctx->opcode)], t0);
-        gen_set_label(l1);
-        tcg_gen_movi_tl(cpu_reserve, -1);
+    EA = tcg_temp_local_new();
+    gen_addr_reg_index(ctx, EA);
+    gen_check_align(ctx, EA, 15);
+    if (unlikely(ctx->le_mode)) {
+        gpr1 = cpu_gpr[rd+1];
+        gpr2 = cpu_gpr[rd];
+    } else {
+        gpr1 = cpu_gpr[rd];
+        gpr2 = cpu_gpr[rd+1];
     }
-#endif
-    tcg_temp_free(t0);
+    gen_qemu_ld64(ctx, gpr1, EA);
+    tcg_gen_mov_tl(cpu_reserve, EA);
+
+    gen_addr_add(ctx, EA, EA, 8);
+    gen_qemu_ld64(ctx, gpr2, EA);
+
+    tcg_gen_st_tl(gpr1, cpu_env, offsetof(CPUPPCState, reserve_val));
+    tcg_gen_st_tl(gpr2, cpu_env, offsetof(CPUPPCState, reserve_val2));
+
+    tcg_temp_free(EA);
 }
+
+/* stdcx. */
+STCX(stdcx_, 8);
+STCX(stqcx_, 16);
 #endif /* defined(TARGET_PPC64) */
 
 /* sync */
@@ -3415,6 +3597,20 @@ static void gen_lfiwax(DisasContext *ctx)
     tcg_temp_free(t0);
 }
 
+/* lfiwzx */
+static void gen_lfiwzx(DisasContext *ctx)
+{
+    TCGv EA;
+    if (unlikely(!ctx->fpu_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_FPU);
+        return;
+    }
+    gen_set_access_type(ctx, ACCESS_FLOAT);
+    EA = tcg_temp_new();
+    gen_addr_reg_index(ctx, EA);
+    gen_qemu_ld32u_i64(ctx, cpu_fpr[rD(ctx->opcode)], EA);
+    tcg_temp_free(EA);
+}
 /***                         Floating-point store                          ***/
 #define GEN_STF(name, stop, opc, type)                                        \
 static void glue(gen_, name)(DisasContext *ctx)                                       \
@@ -3638,6 +3834,7 @@ static void gen_b(DisasContext *ctx)
 #define BCOND_IM  0
 #define BCOND_LR  1
 #define BCOND_CTR 2
+#define BCOND_TAR 3
 
 static inline void gen_bcond(DisasContext *ctx, int type)
 {
@@ -3646,10 +3843,12 @@ static inline void gen_bcond(DisasContext *ctx, int type)
     TCGv target;
 
     ctx->exception = POWERPC_EXCP_BRANCH;
-    if (type == BCOND_LR || type == BCOND_CTR) {
+    if (type == BCOND_LR || type == BCOND_CTR || type == BCOND_TAR) {
         target = tcg_temp_local_new();
         if (type == BCOND_CTR)
             tcg_gen_mov_tl(target, cpu_ctr);
+        else if (type == BCOND_TAR)
+            gen_load_spr(target, SPR_TAR);
         else
             tcg_gen_mov_tl(target, cpu_lr);
     } else {
@@ -3731,6 +3930,11 @@ static void gen_bclr(DisasContext *ctx)
     gen_bcond(ctx, BCOND_LR);
 }
 
+static void gen_bctar(DisasContext *ctx)
+{
+    gen_bcond(ctx, BCOND_TAR);
+}
+
 /***                      Condition register logical                       ***/
 #define GEN_CRLOGIC(name, tcg_op, opc)                                        \
 static void glue(gen_, name)(DisasContext *ctx)                                       \
@@ -6650,6 +6854,9 @@ GEN_VX_LOGICAL(vandc, tcg_gen_andc_i64, 2, 17);
 GEN_VX_LOGICAL(vor, tcg_gen_or_i64, 2, 18);
 GEN_VX_LOGICAL(vxor, tcg_gen_xor_i64, 2, 19);
 GEN_VX_LOGICAL(vnor, tcg_gen_nor_i64, 2, 20);
+GEN_VX_LOGICAL(veqv, tcg_gen_eqv_i64, 2, 26);
+GEN_VX_LOGICAL(vnand, tcg_gen_nand_i64, 2, 22);
+GEN_VX_LOGICAL(vorc, tcg_gen_orc_i64, 2, 21);
 
 #define GEN_VXFORM(name, opc2, opc3)                                    \
 static void glue(gen_, name)(DisasContext *ctx)                                 \
@@ -6685,24 +6892,69 @@ static void glue(gen_, name)(DisasContext *ctx)                         \
     tcg_temp_free_ptr(rd);                                              \
 }
 
+#define GEN_VXFORM3(name, opc2, opc3)                                   \
+static void glue(gen_, name)(DisasContext *ctx)                         \
+{                                                                       \
+    TCGv_ptr ra, rb, rc, rd;                                            \
+    if (unlikely(!ctx->altivec_enabled)) {                              \
+        gen_exception(ctx, POWERPC_EXCP_VPU);                           \
+        return;                                                         \
+    }                                                                   \
+    ra = gen_avr_ptr(rA(ctx->opcode));                                  \
+    rb = gen_avr_ptr(rB(ctx->opcode));                                  \
+    rc = gen_avr_ptr(rC(ctx->opcode));                                  \
+    rd = gen_avr_ptr(rD(ctx->opcode));                                  \
+    gen_helper_##name(rd, ra, rb, rc);                                  \
+    tcg_temp_free_ptr(ra);                                              \
+    tcg_temp_free_ptr(rb);                                              \
+    tcg_temp_free_ptr(rc);                                              \
+    tcg_temp_free_ptr(rd);                                              \
+}
+
+/*
+ * Support for Altivec instruction pairs that use bit 31 (Rc) as
+ * an opcode bit.  In general, these pairs come from different
+ * versions of the ISA, so we must also support a pair of flags for
+ * each instruction.
+ */
+#define GEN_VXFORM_DUAL(name0, flg0, flg2_0, name1, flg1, flg2_1)          \
+static void glue(gen_, name0##_##name1)(DisasContext *ctx)             \
+{                                                                      \
+    if ((Rc(ctx->opcode) == 0) &&                                      \
+        ((ctx->insns_flags & flg0) || (ctx->insns_flags2 & flg2_0))) { \
+        gen_##name0(ctx);                                              \
+    } else if ((Rc(ctx->opcode) == 1) &&                               \
+        ((ctx->insns_flags & flg1) || (ctx->insns_flags2 & flg2_1))) { \
+        gen_##name1(ctx);                                              \
+    } else {                                                           \
+        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);            \
+    }                                                                  \
+}
+
 GEN_VXFORM(vaddubm, 0, 0);
 GEN_VXFORM(vadduhm, 0, 1);
 GEN_VXFORM(vadduwm, 0, 2);
+GEN_VXFORM(vaddudm, 0, 3);
 GEN_VXFORM(vsububm, 0, 16);
 GEN_VXFORM(vsubuhm, 0, 17);
 GEN_VXFORM(vsubuwm, 0, 18);
+GEN_VXFORM(vsubudm, 0, 19);
 GEN_VXFORM(vmaxub, 1, 0);
 GEN_VXFORM(vmaxuh, 1, 1);
 GEN_VXFORM(vmaxuw, 1, 2);
+GEN_VXFORM(vmaxud, 1, 3);
 GEN_VXFORM(vmaxsb, 1, 4);
 GEN_VXFORM(vmaxsh, 1, 5);
 GEN_VXFORM(vmaxsw, 1, 6);
+GEN_VXFORM(vmaxsd, 1, 7);
 GEN_VXFORM(vminub, 1, 8);
 GEN_VXFORM(vminuh, 1, 9);
 GEN_VXFORM(vminuw, 1, 10);
+GEN_VXFORM(vminud, 1, 11);
 GEN_VXFORM(vminsb, 1, 12);
 GEN_VXFORM(vminsh, 1, 13);
 GEN_VXFORM(vminsw, 1, 14);
+GEN_VXFORM(vminsd, 1, 15);
 GEN_VXFORM(vavgub, 1, 16);
 GEN_VXFORM(vavguh, 1, 17);
 GEN_VXFORM(vavguw, 1, 18);
@@ -6715,23 +6967,68 @@ GEN_VXFORM(vmrghw, 6, 2);
 GEN_VXFORM(vmrglb, 6, 4);
 GEN_VXFORM(vmrglh, 6, 5);
 GEN_VXFORM(vmrglw, 6, 6);
+
+static void gen_vmrgew(DisasContext *ctx)
+{
+    TCGv_i64 tmp;
+    int VT, VA, VB;
+    if (unlikely(!ctx->altivec_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VPU);
+        return;
+    }
+    VT = rD(ctx->opcode);
+    VA = rA(ctx->opcode);
+    VB = rB(ctx->opcode);
+    tmp = tcg_temp_new_i64();
+    tcg_gen_shri_i64(tmp, cpu_avrh[VB], 32);
+    tcg_gen_deposit_i64(cpu_avrh[VT], cpu_avrh[VA], tmp, 0, 32);
+    tcg_gen_shri_i64(tmp, cpu_avrl[VB], 32);
+    tcg_gen_deposit_i64(cpu_avrl[VT], cpu_avrl[VA], tmp, 0, 32);
+    tcg_temp_free_i64(tmp);
+}
+
+static void gen_vmrgow(DisasContext *ctx)
+{
+    int VT, VA, VB;
+    if (unlikely(!ctx->altivec_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VPU);
+        return;
+    }
+    VT = rD(ctx->opcode);
+    VA = rA(ctx->opcode);
+    VB = rB(ctx->opcode);
+
+    tcg_gen_deposit_i64(cpu_avrh[VT], cpu_avrh[VB], cpu_avrh[VA], 32, 32);
+    tcg_gen_deposit_i64(cpu_avrl[VT], cpu_avrl[VB], cpu_avrl[VA], 32, 32);
+}
+
 GEN_VXFORM(vmuloub, 4, 0);
 GEN_VXFORM(vmulouh, 4, 1);
+GEN_VXFORM(vmulouw, 4, 2);
+GEN_VXFORM(vmuluwm, 4, 2);
+GEN_VXFORM_DUAL(vmulouw, PPC_ALTIVEC, PPC_NONE,
+                vmuluwm, PPC_NONE, PPC2_ALTIVEC_207)
 GEN_VXFORM(vmulosb, 4, 4);
 GEN_VXFORM(vmulosh, 4, 5);
+GEN_VXFORM(vmulosw, 4, 6);
 GEN_VXFORM(vmuleub, 4, 8);
 GEN_VXFORM(vmuleuh, 4, 9);
+GEN_VXFORM(vmuleuw, 4, 10);
 GEN_VXFORM(vmulesb, 4, 12);
 GEN_VXFORM(vmulesh, 4, 13);
+GEN_VXFORM(vmulesw, 4, 14);
 GEN_VXFORM(vslb, 2, 4);
 GEN_VXFORM(vslh, 2, 5);
 GEN_VXFORM(vslw, 2, 6);
+GEN_VXFORM(vsld, 2, 23);
 GEN_VXFORM(vsrb, 2, 8);
 GEN_VXFORM(vsrh, 2, 9);
 GEN_VXFORM(vsrw, 2, 10);
+GEN_VXFORM(vsrd, 2, 27);
 GEN_VXFORM(vsrab, 2, 12);
 GEN_VXFORM(vsrah, 2, 13);
 GEN_VXFORM(vsraw, 2, 14);
+GEN_VXFORM(vsrad, 2, 15);
 GEN_VXFORM(vslo, 6, 16);
 GEN_VXFORM(vsro, 6, 17);
 GEN_VXFORM(vaddcuw, 0, 6);
@@ -6748,19 +7045,36 @@ GEN_VXFORM_ENV(vsubuws, 0, 26);
 GEN_VXFORM_ENV(vsubsbs, 0, 28);
 GEN_VXFORM_ENV(vsubshs, 0, 29);
 GEN_VXFORM_ENV(vsubsws, 0, 30);
+GEN_VXFORM(vadduqm, 0, 4);
+GEN_VXFORM(vaddcuq, 0, 5);
+GEN_VXFORM3(vaddeuqm, 30, 0);
+GEN_VXFORM3(vaddecuq, 30, 0);
+GEN_VXFORM_DUAL(vaddeuqm, PPC_NONE, PPC2_ALTIVEC_207, \
+            vaddecuq, PPC_NONE, PPC2_ALTIVEC_207)
+GEN_VXFORM(vsubuqm, 0, 20);
+GEN_VXFORM(vsubcuq, 0, 21);
+GEN_VXFORM3(vsubeuqm, 31, 0);
+GEN_VXFORM3(vsubecuq, 31, 0);
+GEN_VXFORM_DUAL(vsubeuqm, PPC_NONE, PPC2_ALTIVEC_207, \
+            vsubecuq, PPC_NONE, PPC2_ALTIVEC_207)
 GEN_VXFORM(vrlb, 2, 0);
 GEN_VXFORM(vrlh, 2, 1);
 GEN_VXFORM(vrlw, 2, 2);
+GEN_VXFORM(vrld, 2, 3);
 GEN_VXFORM(vsl, 2, 7);
 GEN_VXFORM(vsr, 2, 11);
 GEN_VXFORM_ENV(vpkuhum, 7, 0);
 GEN_VXFORM_ENV(vpkuwum, 7, 1);
+GEN_VXFORM_ENV(vpkudum, 7, 17);
 GEN_VXFORM_ENV(vpkuhus, 7, 2);
 GEN_VXFORM_ENV(vpkuwus, 7, 3);
+GEN_VXFORM_ENV(vpkudus, 7, 19);
 GEN_VXFORM_ENV(vpkshus, 7, 4);
 GEN_VXFORM_ENV(vpkswus, 7, 5);
+GEN_VXFORM_ENV(vpksdus, 7, 21);
 GEN_VXFORM_ENV(vpkshss, 7, 6);
 GEN_VXFORM_ENV(vpkswss, 7, 7);
+GEN_VXFORM_ENV(vpksdss, 7, 23);
 GEN_VXFORM(vpkpx, 7, 12);
 GEN_VXFORM_ENV(vsum4ubs, 4, 24);
 GEN_VXFORM_ENV(vsum4sbs, 4, 28);
@@ -6793,20 +7107,58 @@ static void glue(gen_, name)(DisasContext *ctx)                         \
     GEN_VXRFORM1(name, name, #name, opc2, opc3)                      \
     GEN_VXRFORM1(name##_dot, name##_, #name ".", opc2, (opc3 | (0x1 << 4)))
 
+/*
+ * Support for Altivec instructions that use bit 31 (Rc) as an opcode
+ * bit but also use bit 21 as an actual Rc bit.  In general, thse pairs
+ * come from different versions of the ISA, so we must also support a
+ * pair of flags for each instruction.
+ */
+#define GEN_VXRFORM_DUAL(name0, flg0, flg2_0, name1, flg1, flg2_1)     \
+static void glue(gen_, name0##_##name1)(DisasContext *ctx)             \
+{                                                                      \
+    if ((Rc(ctx->opcode) == 0) &&                                      \
+        ((ctx->insns_flags & flg0) || (ctx->insns_flags2 & flg2_0))) { \
+        if (Rc21(ctx->opcode) == 0) {                                  \
+            gen_##name0(ctx);                                          \
+        } else {                                                       \
+            gen_##name0##_(ctx);                                       \
+        }                                                              \
+    } else if ((Rc(ctx->opcode) == 1) &&                               \
+        ((ctx->insns_flags & flg1) || (ctx->insns_flags2 & flg2_1))) { \
+        if (Rc21(ctx->opcode) == 0) {                                  \
+            gen_##name1(ctx);                                          \
+        } else {                                                       \
+            gen_##name1##_(ctx);                                       \
+        }                                                              \
+    } else {                                                           \
+        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);            \
+    }                                                                  \
+}
+
 GEN_VXRFORM(vcmpequb, 3, 0)
 GEN_VXRFORM(vcmpequh, 3, 1)
 GEN_VXRFORM(vcmpequw, 3, 2)
+GEN_VXRFORM(vcmpequd, 3, 3)
 GEN_VXRFORM(vcmpgtsb, 3, 12)
 GEN_VXRFORM(vcmpgtsh, 3, 13)
 GEN_VXRFORM(vcmpgtsw, 3, 14)
+GEN_VXRFORM(vcmpgtsd, 3, 15)
 GEN_VXRFORM(vcmpgtub, 3, 8)
 GEN_VXRFORM(vcmpgtuh, 3, 9)
 GEN_VXRFORM(vcmpgtuw, 3, 10)
+GEN_VXRFORM(vcmpgtud, 3, 11)
 GEN_VXRFORM(vcmpeqfp, 3, 3)
 GEN_VXRFORM(vcmpgefp, 3, 7)
 GEN_VXRFORM(vcmpgtfp, 3, 11)
 GEN_VXRFORM(vcmpbfp, 3, 15)
 
+GEN_VXRFORM_DUAL(vcmpeqfp, PPC_ALTIVEC, PPC_NONE, \
+                 vcmpequd, PPC_NONE, PPC2_ALTIVEC_207)
+GEN_VXRFORM_DUAL(vcmpbfp, PPC_ALTIVEC, PPC_NONE, \
+                 vcmpgtsd, PPC_NONE, PPC2_ALTIVEC_207)
+GEN_VXRFORM_DUAL(vcmpgtfp, PPC_ALTIVEC, PPC_NONE, \
+                 vcmpgtud, PPC_NONE, PPC2_ALTIVEC_207)
+
 #define GEN_VXFORM_SIMM(name, opc2, opc3)                               \
 static void glue(gen_, name)(DisasContext *ctx)                         \
     {                                                                   \
@@ -6860,8 +7212,10 @@ static void glue(gen_, name)(DisasContext *ctx)                         \
 
 GEN_VXFORM_NOA(vupkhsb, 7, 8);
 GEN_VXFORM_NOA(vupkhsh, 7, 9);
+GEN_VXFORM_NOA(vupkhsw, 7, 25);
 GEN_VXFORM_NOA(vupklsb, 7, 10);
 GEN_VXFORM_NOA(vupklsh, 7, 11);
+GEN_VXFORM_NOA(vupklsw, 7, 27);
 GEN_VXFORM_NOA(vupkhpx, 7, 13);
 GEN_VXFORM_NOA(vupklpx, 7, 15);
 GEN_VXFORM_NOA_ENV(vrefp, 5, 4);
@@ -7002,6 +7356,115 @@ GEN_VAFORM_PAIRED(vmsumshm, vmsumshs, 20)
 GEN_VAFORM_PAIRED(vsel, vperm, 21)
 GEN_VAFORM_PAIRED(vmaddfp, vnmsubfp, 23)
 
+GEN_VXFORM_NOA(vclzb, 1, 28)
+GEN_VXFORM_NOA(vclzh, 1, 29)
+GEN_VXFORM_NOA(vclzw, 1, 30)
+GEN_VXFORM_NOA(vclzd, 1, 31)
+GEN_VXFORM_NOA(vpopcntb, 1, 28)
+GEN_VXFORM_NOA(vpopcnth, 1, 29)
+GEN_VXFORM_NOA(vpopcntw, 1, 30)
+GEN_VXFORM_NOA(vpopcntd, 1, 31)
+GEN_VXFORM_DUAL(vclzb, PPC_NONE, PPC2_ALTIVEC_207, \
+                vpopcntb, PPC_NONE, PPC2_ALTIVEC_207)
+GEN_VXFORM_DUAL(vclzh, PPC_NONE, PPC2_ALTIVEC_207, \
+                vpopcnth, PPC_NONE, PPC2_ALTIVEC_207)
+GEN_VXFORM_DUAL(vclzw, PPC_NONE, PPC2_ALTIVEC_207, \
+                vpopcntw, PPC_NONE, PPC2_ALTIVEC_207)
+GEN_VXFORM_DUAL(vclzd, PPC_NONE, PPC2_ALTIVEC_207, \
+                vpopcntd, PPC_NONE, PPC2_ALTIVEC_207)
+GEN_VXFORM(vbpermq, 6, 21);
+GEN_VXFORM_NOA(vgbbd, 6, 20);
+GEN_VXFORM(vpmsumb, 4, 16)
+GEN_VXFORM(vpmsumh, 4, 17)
+GEN_VXFORM(vpmsumw, 4, 18)
+GEN_VXFORM(vpmsumd, 4, 19)
+
+#define GEN_BCD(op)                                 \
+static void gen_##op(DisasContext *ctx)             \
+{                                                   \
+    TCGv_ptr ra, rb, rd;                            \
+    TCGv_i32 ps;                                    \
+                                                    \
+    if (unlikely(!ctx->altivec_enabled)) {          \
+        gen_exception(ctx, POWERPC_EXCP_VPU);       \
+        return;                                     \
+    }                                               \
+                                                    \
+    ra = gen_avr_ptr(rA(ctx->opcode));              \
+    rb = gen_avr_ptr(rB(ctx->opcode));              \
+    rd = gen_avr_ptr(rD(ctx->opcode));              \
+                                                    \
+    ps = tcg_const_i32((ctx->opcode & 0x200) != 0); \
+                                                    \
+    gen_helper_##op(cpu_crf[6], rd, ra, rb, ps);    \
+                                                    \
+    tcg_temp_free_ptr(ra);                          \
+    tcg_temp_free_ptr(rb);                          \
+    tcg_temp_free_ptr(rd);                          \
+    tcg_temp_free_i32(ps);                          \
+}
+
+GEN_BCD(bcdadd)
+GEN_BCD(bcdsub)
+
+GEN_VXFORM_DUAL(vsububm, PPC_ALTIVEC, PPC_NONE, \
+                bcdadd, PPC_NONE, PPC2_ALTIVEC_207)
+GEN_VXFORM_DUAL(vsububs, PPC_ALTIVEC, PPC_NONE, \
+                bcdadd, PPC_NONE, PPC2_ALTIVEC_207)
+GEN_VXFORM_DUAL(vsubuhm, PPC_ALTIVEC, PPC_NONE, \
+                bcdsub, PPC_NONE, PPC2_ALTIVEC_207)
+GEN_VXFORM_DUAL(vsubuhs, PPC_ALTIVEC, PPC_NONE, \
+                bcdsub, PPC_NONE, PPC2_ALTIVEC_207)
+
+static void gen_vsbox(DisasContext *ctx)
+{
+    TCGv_ptr ra, rd;
+    if (unlikely(!ctx->altivec_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VPU);
+        return;
+    }
+    ra = gen_avr_ptr(rA(ctx->opcode));
+    rd = gen_avr_ptr(rD(ctx->opcode));
+    gen_helper_vsbox(rd, ra);
+    tcg_temp_free_ptr(ra);
+    tcg_temp_free_ptr(rd);
+}
+
+GEN_VXFORM(vcipher, 4, 20)
+GEN_VXFORM(vcipherlast, 4, 20)
+GEN_VXFORM(vncipher, 4, 21)
+GEN_VXFORM(vncipherlast, 4, 21)
+
+GEN_VXFORM_DUAL(vcipher, PPC_NONE, PPC2_ALTIVEC_207,
+                vcipherlast, PPC_NONE, PPC2_ALTIVEC_207)
+GEN_VXFORM_DUAL(vncipher, PPC_NONE, PPC2_ALTIVEC_207,
+                vncipherlast, PPC_NONE, PPC2_ALTIVEC_207)
+
+#define VSHASIGMA(op)                         \
+static void gen_##op(DisasContext *ctx)       \
+{                                             \
+    TCGv_ptr ra, rd;                          \
+    TCGv_i32 st_six;                          \
+    if (unlikely(!ctx->altivec_enabled)) {    \
+        gen_exception(ctx, POWERPC_EXCP_VPU); \
+        return;                               \
+    }                                         \
+    ra = gen_avr_ptr(rA(ctx->opcode));        \
+    rd = gen_avr_ptr(rD(ctx->opcode));        \
+    st_six = tcg_const_i32(rB(ctx->opcode));  \
+    gen_helper_##op(rd, ra, st_six);          \
+    tcg_temp_free_ptr(ra);                    \
+    tcg_temp_free_ptr(rd);                    \
+    tcg_temp_free_i32(st_six);                \
+}
+
+VSHASIGMA(vshasigmaw)
+VSHASIGMA(vshasigmad)
+
+GEN_VXFORM3(vpermxor, 22, 0xFF)
+GEN_VXFORM_DUAL(vsldoi, PPC_ALTIVEC, PPC_NONE,
+                vpermxor, PPC_NONE, PPC2_ALTIVEC_207)
+
 /***                           VSX extension                               ***/
 
 static inline TCGv_i64 cpu_vsrh(int n)
@@ -7022,21 +7485,27 @@ static inline TCGv_i64 cpu_vsrl(int n)
     }
 }
 
-static void gen_lxsdx(DisasContext *ctx)
-{
-    TCGv EA;
-    if (unlikely(!ctx->vsx_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VSXU);
-        return;
-    }
-    gen_set_access_type(ctx, ACCESS_INT);
-    EA = tcg_temp_new();
-    gen_addr_reg_index(ctx, EA);
-    gen_qemu_ld64(ctx, cpu_vsrh(xT(ctx->opcode)), EA);
-    /* NOTE: cpu_vsrl is undefined */
-    tcg_temp_free(EA);
+#define VSX_LOAD_SCALAR(name, operation)                      \
+static void gen_##name(DisasContext *ctx)                     \
+{                                                             \
+    TCGv EA;                                                  \
+    if (unlikely(!ctx->vsx_enabled)) {                        \
+        gen_exception(ctx, POWERPC_EXCP_VSXU);                \
+        return;                                               \
+    }                                                         \
+    gen_set_access_type(ctx, ACCESS_INT);                     \
+    EA = tcg_temp_new();                                      \
+    gen_addr_reg_index(ctx, EA);                              \
+    gen_qemu_##operation(ctx, cpu_vsrh(xT(ctx->opcode)), EA); \
+    /* NOTE: cpu_vsrl is undefined */                         \
+    tcg_temp_free(EA);                                        \
 }
 
+VSX_LOAD_SCALAR(lxsdx, ld64)
+VSX_LOAD_SCALAR(lxsiwax, ld32s_i64)
+VSX_LOAD_SCALAR(lxsiwzx, ld32u_i64)
+VSX_LOAD_SCALAR(lxsspx, ld32fs)
+
 static void gen_lxvd2x(DisasContext *ctx)
 {
     TCGv EA;
@@ -7098,20 +7567,25 @@ static void gen_lxvw4x(DisasContext *ctx)
     tcg_temp_free_i64(tmp);
 }
 
-static void gen_stxsdx(DisasContext *ctx)
-{
-    TCGv EA;
-    if (unlikely(!ctx->vsx_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VSXU);
-        return;
-    }
-    gen_set_access_type(ctx, ACCESS_INT);
-    EA = tcg_temp_new();
-    gen_addr_reg_index(ctx, EA);
-    gen_qemu_st64(ctx, cpu_vsrh(xS(ctx->opcode)), EA);
-    tcg_temp_free(EA);
+#define VSX_STORE_SCALAR(name, operation)                     \
+static void gen_##name(DisasContext *ctx)                     \
+{                                                             \
+    TCGv EA;                                                  \
+    if (unlikely(!ctx->vsx_enabled)) {                        \
+        gen_exception(ctx, POWERPC_EXCP_VSXU);                \
+        return;                                               \
+    }                                                         \
+    gen_set_access_type(ctx, ACCESS_INT);                     \
+    EA = tcg_temp_new();                                      \
+    gen_addr_reg_index(ctx, EA);                              \
+    gen_qemu_##operation(ctx, cpu_vsrh(xS(ctx->opcode)), EA); \
+    tcg_temp_free(EA);                                        \
 }
 
+VSX_STORE_SCALAR(stxsdx, st64)
+VSX_STORE_SCALAR(stxsiwx, st32_i64)
+VSX_STORE_SCALAR(stxsspx, st32fs)
+
 static void gen_stxvd2x(DisasContext *ctx)
 {
     TCGv EA;
@@ -7156,6 +7630,57 @@ static void gen_stxvw4x(DisasContext *ctx)
     tcg_temp_free_i64(tmp);
 }
 
+#define MV_VSRW(name, tcgop1, tcgop2, target, source)           \
+static void gen_##name(DisasContext *ctx)                       \
+{                                                               \
+    if (xS(ctx->opcode) < 32) {                                 \
+        if (unlikely(!ctx->fpu_enabled)) {                      \
+            gen_exception(ctx, POWERPC_EXCP_FPU);               \
+            return;                                             \
+        }                                                       \
+    } else {                                                    \
+        if (unlikely(!ctx->altivec_enabled)) {                  \
+            gen_exception(ctx, POWERPC_EXCP_VPU);               \
+            return;                                             \
+        }                                                       \
+    }                                                           \
+    TCGv_i64 tmp = tcg_temp_new_i64();                          \
+    tcg_gen_##tcgop1(tmp, source);                              \
+    tcg_gen_##tcgop2(target, tmp);                              \
+    tcg_temp_free_i64(tmp);                                     \
+}
+
+
+MV_VSRW(mfvsrwz, ext32u_i64, trunc_i64_tl, cpu_gpr[rA(ctx->opcode)], \
+        cpu_vsrh(xS(ctx->opcode)))
+MV_VSRW(mtvsrwa, extu_tl_i64, ext32s_i64, cpu_vsrh(xT(ctx->opcode)), \
+        cpu_gpr[rA(ctx->opcode)])
+MV_VSRW(mtvsrwz, extu_tl_i64, ext32u_i64, cpu_vsrh(xT(ctx->opcode)), \
+        cpu_gpr[rA(ctx->opcode)])
+
+#if defined(TARGET_PPC64)
+#define MV_VSRD(name, target, source)                           \
+static void gen_##name(DisasContext *ctx)                       \
+{                                                               \
+    if (xS(ctx->opcode) < 32) {                                 \
+        if (unlikely(!ctx->fpu_enabled)) {                      \
+            gen_exception(ctx, POWERPC_EXCP_FPU);               \
+            return;                                             \
+        }                                                       \
+    } else {                                                    \
+        if (unlikely(!ctx->altivec_enabled)) {                  \
+            gen_exception(ctx, POWERPC_EXCP_VPU);               \
+            return;                                             \
+        }                                                       \
+    }                                                           \
+    tcg_gen_mov_i64(target, source);                            \
+}
+
+MV_VSRD(mfvsrd, cpu_gpr[rA(ctx->opcode)], cpu_vsrh(xS(ctx->opcode)))
+MV_VSRD(mtvsrd, cpu_vsrh(xT(ctx->opcode)), cpu_gpr[rA(ctx->opcode)])
+
+#endif
+
 static void gen_xxpermdi(DisasContext *ctx)
 {
     if (unlikely(!ctx->vsx_enabled)) {
@@ -7163,15 +7688,40 @@ static void gen_xxpermdi(DisasContext *ctx)
         return;
     }
 
-    if ((DM(ctx->opcode) & 2) == 0) {
-        tcg_gen_mov_i64(cpu_vsrh(xT(ctx->opcode)), cpu_vsrh(xA(ctx->opcode)));
-    } else {
-        tcg_gen_mov_i64(cpu_vsrh(xT(ctx->opcode)), cpu_vsrl(xA(ctx->opcode)));
-    }
-    if ((DM(ctx->opcode) & 1) == 0) {
-        tcg_gen_mov_i64(cpu_vsrl(xT(ctx->opcode)), cpu_vsrh(xB(ctx->opcode)));
+    if (unlikely((xT(ctx->opcode) == xA(ctx->opcode)) ||
+                 (xT(ctx->opcode) == xB(ctx->opcode)))) {
+        TCGv_i64 xh, xl;
+
+        xh = tcg_temp_new_i64();
+        xl = tcg_temp_new_i64();
+
+        if ((DM(ctx->opcode) & 2) == 0) {
+            tcg_gen_mov_i64(xh, cpu_vsrh(xA(ctx->opcode)));
+        } else {
+            tcg_gen_mov_i64(xh, cpu_vsrl(xA(ctx->opcode)));
+        }
+        if ((DM(ctx->opcode) & 1) == 0) {
+            tcg_gen_mov_i64(xl, cpu_vsrh(xB(ctx->opcode)));
+        } else {
+            tcg_gen_mov_i64(xl, cpu_vsrl(xB(ctx->opcode)));
+        }
+
+        tcg_gen_mov_i64(cpu_vsrh(xT(ctx->opcode)), xh);
+        tcg_gen_mov_i64(cpu_vsrl(xT(ctx->opcode)), xl);
+
+        tcg_temp_free_i64(xh);
+        tcg_temp_free_i64(xl);
     } else {
-        tcg_gen_mov_i64(cpu_vsrl(xT(ctx->opcode)), cpu_vsrl(xB(ctx->opcode)));
+        if ((DM(ctx->opcode) & 2) == 0) {
+            tcg_gen_mov_i64(cpu_vsrh(xT(ctx->opcode)), cpu_vsrh(xA(ctx->opcode)));
+        } else {
+            tcg_gen_mov_i64(cpu_vsrh(xT(ctx->opcode)), cpu_vsrl(xA(ctx->opcode)));
+        }
+        if ((DM(ctx->opcode) & 1) == 0) {
+            tcg_gen_mov_i64(cpu_vsrl(xT(ctx->opcode)), cpu_vsrh(xB(ctx->opcode)));
+        } else {
+            tcg_gen_mov_i64(cpu_vsrl(xT(ctx->opcode)), cpu_vsrl(xB(ctx->opcode)));
+        }
     }
 }
 
@@ -7179,8 +7729,8 @@ static void gen_xxpermdi(DisasContext *ctx)
 #define OP_NABS 2
 #define OP_NEG 3
 #define OP_CPSGN 4
-#define SGN_MASK_DP  0x8000000000000000ul
-#define SGN_MASK_SP 0x8000000080000000ul
+#define SGN_MASK_DP  0x8000000000000000ull
+#define SGN_MASK_SP 0x8000000080000000ull
 
 #define VSX_SCALAR_MOVE(name, op, sgn_mask)                       \
 static void glue(gen_, name)(DisasContext * ctx)                  \
@@ -7289,6 +7839,165 @@ VSX_VECTOR_MOVE(xvnabssp, OP_NABS, SGN_MASK_SP)
 VSX_VECTOR_MOVE(xvnegsp, OP_NEG, SGN_MASK_SP)
 VSX_VECTOR_MOVE(xvcpsgnsp, OP_CPSGN, SGN_MASK_SP)
 
+#define GEN_VSX_HELPER_2(name, op1, op2, inval, type)                         \
+static void gen_##name(DisasContext * ctx)                                    \
+{                                                                             \
+    TCGv_i32 opc;                                                             \
+    if (unlikely(!ctx->vsx_enabled)) {                                        \
+        gen_exception(ctx, POWERPC_EXCP_VSXU);                                \
+        return;                                                               \
+    }                                                                         \
+    /* NIP cannot be restored if the memory exception comes from an helper */ \
+    gen_update_nip(ctx, ctx->nip - 4);                                        \
+    opc = tcg_const_i32(ctx->opcode);                                         \
+    gen_helper_##name(cpu_env, opc);                                          \
+    tcg_temp_free_i32(opc);                                                   \
+}
+
+#define GEN_VSX_HELPER_XT_XB_ENV(name, op1, op2, inval, type) \
+static void gen_##name(DisasContext * ctx)                    \
+{                                                             \
+    if (unlikely(!ctx->vsx_enabled)) {                        \
+        gen_exception(ctx, POWERPC_EXCP_VSXU);                \
+        return;                                               \
+    }                                                         \
+    /* NIP cannot be restored if the exception comes */       \
+    /* from a helper. */                                      \
+    gen_update_nip(ctx, ctx->nip - 4);                        \
+                                                              \
+    gen_helper_##name(cpu_vsrh(xT(ctx->opcode)), cpu_env,     \
+                      cpu_vsrh(xB(ctx->opcode)));             \
+}
+
+GEN_VSX_HELPER_2(xsadddp, 0x00, 0x04, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xssubdp, 0x00, 0x05, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsmuldp, 0x00, 0x06, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsdivdp, 0x00, 0x07, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsredp, 0x14, 0x05, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xssqrtdp, 0x16, 0x04, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsrsqrtedp, 0x14, 0x04, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xstdivdp, 0x14, 0x07, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xstsqrtdp, 0x14, 0x06, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsmaddadp, 0x04, 0x04, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsmaddmdp, 0x04, 0x05, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsmsubadp, 0x04, 0x06, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsmsubmdp, 0x04, 0x07, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsnmaddadp, 0x04, 0x14, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsnmaddmdp, 0x04, 0x15, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsnmsubadp, 0x04, 0x16, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsnmsubmdp, 0x04, 0x17, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xscmpodp, 0x0C, 0x05, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xscmpudp, 0x0C, 0x04, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsmaxdp, 0x00, 0x14, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsmindp, 0x00, 0x15, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xscvdpsp, 0x12, 0x10, 0, PPC2_VSX)
+GEN_VSX_HELPER_XT_XB_ENV(xscvdpspn, 0x16, 0x10, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xscvspdp, 0x12, 0x14, 0, PPC2_VSX)
+GEN_VSX_HELPER_XT_XB_ENV(xscvspdpn, 0x16, 0x14, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xscvdpsxds, 0x10, 0x15, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xscvdpsxws, 0x10, 0x05, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xscvdpuxds, 0x10, 0x14, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xscvdpuxws, 0x10, 0x04, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xscvsxddp, 0x10, 0x17, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xscvuxddp, 0x10, 0x16, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsrdpi, 0x12, 0x04, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsrdpic, 0x16, 0x06, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsrdpim, 0x12, 0x07, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsrdpip, 0x12, 0x06, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsrdpiz, 0x12, 0x05, 0, PPC2_VSX)
+GEN_VSX_HELPER_XT_XB_ENV(xsrsp, 0x12, 0x11, 0, PPC2_VSX207)
+
+GEN_VSX_HELPER_2(xsaddsp, 0x00, 0x00, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xssubsp, 0x00, 0x01, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsmulsp, 0x00, 0x02, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsdivsp, 0x00, 0x03, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsresp, 0x14, 0x01, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xssqrtsp, 0x16, 0x00, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsrsqrtesp, 0x14, 0x00, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsmaddasp, 0x04, 0x00, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsmaddmsp, 0x04, 0x01, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsmsubasp, 0x04, 0x02, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsmsubmsp, 0x04, 0x03, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsnmaddasp, 0x04, 0x10, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsnmaddmsp, 0x04, 0x11, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsnmsubasp, 0x04, 0x12, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsnmsubmsp, 0x04, 0x13, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xscvsxdsp, 0x10, 0x13, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xscvuxdsp, 0x10, 0x12, 0, PPC2_VSX207)
+
+GEN_VSX_HELPER_2(xvadddp, 0x00, 0x0C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvsubdp, 0x00, 0x0D, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmuldp, 0x00, 0x0E, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvdivdp, 0x00, 0x0F, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvredp, 0x14, 0x0D, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvsqrtdp, 0x16, 0x0C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrsqrtedp, 0x14, 0x0C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvtdivdp, 0x14, 0x0F, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvtsqrtdp, 0x14, 0x0E, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmaddadp, 0x04, 0x0C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmaddmdp, 0x04, 0x0D, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmsubadp, 0x04, 0x0E, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmsubmdp, 0x04, 0x0F, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvnmaddadp, 0x04, 0x1C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvnmaddmdp, 0x04, 0x1D, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvnmsubadp, 0x04, 0x1E, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvnmsubmdp, 0x04, 0x1F, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmaxdp, 0x00, 0x1C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmindp, 0x00, 0x1D, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcmpeqdp, 0x0C, 0x0C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcmpgtdp, 0x0C, 0x0D, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcmpgedp, 0x0C, 0x0E, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvdpsp, 0x12, 0x18, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvdpsxds, 0x10, 0x1D, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvdpsxws, 0x10, 0x0D, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvdpuxds, 0x10, 0x1C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvdpuxws, 0x10, 0x0C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvsxddp, 0x10, 0x1F, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvuxddp, 0x10, 0x1E, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvsxwdp, 0x10, 0x0F, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvuxwdp, 0x10, 0x0E, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrdpi, 0x12, 0x0C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrdpic, 0x16, 0x0E, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrdpim, 0x12, 0x0F, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrdpip, 0x12, 0x0E, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrdpiz, 0x12, 0x0D, 0, PPC2_VSX)
+
+GEN_VSX_HELPER_2(xvaddsp, 0x00, 0x08, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvsubsp, 0x00, 0x09, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmulsp, 0x00, 0x0A, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvdivsp, 0x00, 0x0B, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvresp, 0x14, 0x09, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvsqrtsp, 0x16, 0x08, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrsqrtesp, 0x14, 0x08, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvtdivsp, 0x14, 0x0B, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvtsqrtsp, 0x14, 0x0A, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmaddasp, 0x04, 0x08, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmaddmsp, 0x04, 0x09, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmsubasp, 0x04, 0x0A, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmsubmsp, 0x04, 0x0B, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvnmaddasp, 0x04, 0x18, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvnmaddmsp, 0x04, 0x19, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvnmsubasp, 0x04, 0x1A, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvnmsubmsp, 0x04, 0x1B, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmaxsp, 0x00, 0x18, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvminsp, 0x00, 0x19, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcmpeqsp, 0x0C, 0x08, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcmpgtsp, 0x0C, 0x09, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcmpgesp, 0x0C, 0x0A, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvspdp, 0x12, 0x1C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvspsxds, 0x10, 0x19, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvspsxws, 0x10, 0x09, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvspuxds, 0x10, 0x18, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvspuxws, 0x10, 0x08, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvsxdsp, 0x10, 0x1B, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvuxdsp, 0x10, 0x1A, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvsxwsp, 0x10, 0x0B, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvuxwsp, 0x10, 0x0A, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrspi, 0x12, 0x08, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrspic, 0x16, 0x0A, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrspim, 0x12, 0x0B, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrspip, 0x12, 0x0A, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrspiz, 0x12, 0x09, 0, PPC2_VSX)
 
 #define VSX_LOGICAL(name, tcg_op)                                    \
 static void glue(gen_, name)(DisasContext * ctx)                     \
@@ -7308,6 +8017,9 @@ VSX_LOGICAL(xxlandc, tcg_gen_andc_i64)
 VSX_LOGICAL(xxlor, tcg_gen_or_i64)
 VSX_LOGICAL(xxlxor, tcg_gen_xor_i64)
 VSX_LOGICAL(xxlnor, tcg_gen_nor_i64)
+VSX_LOGICAL(xxleqv, tcg_gen_eqv_i64)
+VSX_LOGICAL(xxlnand, tcg_gen_nand_i64)
+VSX_LOGICAL(xxlorc, tcg_gen_orc_i64)
 
 #define VSX_XXMRG(name, high)                               \
 static void glue(gen_, name)(DisasContext * ctx)            \
@@ -9175,6 +9887,7 @@ GEN_HANDLER_E(prtyw, 0x1F, 0x1A, 0x04, 0x0000F801, PPC_NONE, PPC2_ISA205),
 GEN_HANDLER(popcntd, 0x1F, 0x1A, 0x0F, 0x0000F801, PPC_POPCNTWD),
 GEN_HANDLER(cntlzd, 0x1F, 0x1A, 0x01, 0x00000000, PPC_64B),
 GEN_HANDLER_E(prtyd, 0x1F, 0x1A, 0x05, 0x0000F801, PPC_NONE, PPC2_ISA205),
+GEN_HANDLER_E(bpermd, 0x1F, 0x1C, 0x07, 0x00000001, PPC_NONE, PPC2_PERM_ISA206),
 #endif
 GEN_HANDLER(rlwimi, 0x14, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
 GEN_HANDLER(rlwinm, 0x15, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
@@ -9200,6 +9913,8 @@ GEN_HANDLER(fmr, 0x3F, 0x08, 0x02, 0x001F0000, PPC_FLOAT),
 GEN_HANDLER(fnabs, 0x3F, 0x08, 0x04, 0x001F0000, PPC_FLOAT),
 GEN_HANDLER(fneg, 0x3F, 0x08, 0x01, 0x001F0000, PPC_FLOAT),
 GEN_HANDLER_E(fcpsgn, 0x3F, 0x08, 0x00, 0x00000000, PPC_NONE, PPC2_ISA205),
+GEN_HANDLER_E(fmrgew, 0x3F, 0x06, 0x1E, 0x00000001, PPC_NONE, PPC2_VSX207),
+GEN_HANDLER_E(fmrgow, 0x3F, 0x06, 0x1A, 0x00000001, PPC_NONE, PPC2_VSX207),
 GEN_HANDLER(mcrfs, 0x3F, 0x00, 0x02, 0x0063F801, PPC_FLOAT),
 GEN_HANDLER(mffs, 0x3F, 0x07, 0x12, 0x001FF800, PPC_FLOAT),
 GEN_HANDLER(mtfsb0, 0x3F, 0x06, 0x02, 0x001FF800, PPC_FLOAT),
@@ -9219,11 +9934,17 @@ GEN_HANDLER(stswi, 0x1F, 0x15, 0x16, 0x00000001, PPC_STRING),
 GEN_HANDLER(stswx, 0x1F, 0x15, 0x14, 0x00000001, PPC_STRING),
 GEN_HANDLER(eieio, 0x1F, 0x16, 0x1A, 0x03FFF801, PPC_MEM_EIEIO),
 GEN_HANDLER(isync, 0x13, 0x16, 0x04, 0x03FFF801, PPC_MEM),
+GEN_HANDLER_E(lbarx, 0x1F, 0x14, 0x01, 0, PPC_NONE, PPC2_ATOMIC_ISA206),
+GEN_HANDLER_E(lharx, 0x1F, 0x14, 0x03, 0, PPC_NONE, PPC2_ATOMIC_ISA206),
 GEN_HANDLER(lwarx, 0x1F, 0x14, 0x00, 0x00000000, PPC_RES),
+GEN_HANDLER_E(stbcx_, 0x1F, 0x16, 0x15, 0, PPC_NONE, PPC2_ATOMIC_ISA206),
+GEN_HANDLER_E(sthcx_, 0x1F, 0x16, 0x16, 0, PPC_NONE, PPC2_ATOMIC_ISA206),
 GEN_HANDLER2(stwcx_, "stwcx.", 0x1F, 0x16, 0x04, 0x00000000, PPC_RES),
 #if defined(TARGET_PPC64)
 GEN_HANDLER(ldarx, 0x1F, 0x14, 0x02, 0x00000000, PPC_64B),
+GEN_HANDLER_E(lqarx, 0x1F, 0x14, 0x08, 0, PPC_NONE, PPC2_LSQ_ISA207),
 GEN_HANDLER2(stdcx_, "stdcx.", 0x1F, 0x16, 0x06, 0x00000000, PPC_64B),
+GEN_HANDLER_E(stqcx_, 0x1F, 0x16, 0x05, 0, PPC_NONE, PPC2_LSQ_ISA207),
 #endif
 GEN_HANDLER(sync, 0x1F, 0x16, 0x12, 0x039FF801, PPC_MEM_SYNC),
 GEN_HANDLER(wait, 0x1F, 0x1E, 0x01, 0x03FFF801, PPC_WAIT),
@@ -9231,6 +9952,7 @@ GEN_HANDLER(b, 0x12, 0xFF, 0xFF, 0x00000000, PPC_FLOW),
 GEN_HANDLER(bc, 0x10, 0xFF, 0xFF, 0x00000000, PPC_FLOW),
 GEN_HANDLER(bcctr, 0x13, 0x10, 0x10, 0x00000000, PPC_FLOW),
 GEN_HANDLER(bclr, 0x13, 0x10, 0x00, 0x00000000, PPC_FLOW),
+GEN_HANDLER_E(bctar, 0x13, 0x10, 0x11, 0, PPC_NONE, PPC2_BCTAR_ISA207),
 GEN_HANDLER(mcrf, 0x13, 0x00, 0xFF, 0x00000001, PPC_INTEGER),
 GEN_HANDLER(rfi, 0x13, 0x12, 0x01, 0x03FF8001, PPC_FLOW),
 #if defined(TARGET_PPC64)
@@ -9258,8 +9980,8 @@ GEN_HANDLER(mtspr, 0x1F, 0x13, 0x0E, 0x00000001, PPC_MISC),
 GEN_HANDLER(dcbf, 0x1F, 0x16, 0x02, 0x03C00001, PPC_CACHE),
 GEN_HANDLER(dcbi, 0x1F, 0x16, 0x0E, 0x03E00001, PPC_CACHE),
 GEN_HANDLER(dcbst, 0x1F, 0x16, 0x01, 0x03E00001, PPC_CACHE),
-GEN_HANDLER(dcbt, 0x1F, 0x16, 0x08, 0x02000001, PPC_CACHE),
-GEN_HANDLER(dcbtst, 0x1F, 0x16, 0x07, 0x02000001, PPC_CACHE),
+GEN_HANDLER(dcbt, 0x1F, 0x16, 0x08, 0x00000001, PPC_CACHE),
+GEN_HANDLER(dcbtst, 0x1F, 0x16, 0x07, 0x00000001, PPC_CACHE),
 GEN_HANDLER(dcbz, 0x1F, 0x16, 0x1F, 0x03C00001, PPC_CACHE_DCBZ),
 GEN_HANDLER(dst, 0x1F, 0x16, 0x0A, 0x01800001, PPC_ALTIVEC),
 GEN_HANDLER(dstst, 0x1F, 0x16, 0x0B, 0x02000001, PPC_ALTIVEC),
@@ -9395,7 +10117,6 @@ GEN_HANDLER(lvsl, 0x1f, 0x06, 0x00, 0x00000001, PPC_ALTIVEC),
 GEN_HANDLER(lvsr, 0x1f, 0x06, 0x01, 0x00000001, PPC_ALTIVEC),
 GEN_HANDLER(mfvscr, 0x04, 0x2, 0x18, 0x001ff800, PPC_ALTIVEC),
 GEN_HANDLER(mtvscr, 0x04, 0x2, 0x19, 0x03ff0000, PPC_ALTIVEC),
-GEN_HANDLER(vsldoi, 0x04, 0x16, 0xFF, 0x00000400, PPC_ALTIVEC),
 GEN_HANDLER(vmladduhm, 0x04, 0x11, 0xFF, 0x00000000, PPC_ALTIVEC),
 GEN_HANDLER2(evsel0, "evsel", 0x04, 0x1c, 0x09, 0x00000000, PPC_SPE),
 GEN_HANDLER2(evsel1, "evsel", 0x04, 0x1d, 0x09, 0x00000000, PPC_SPE),
@@ -9427,6 +10148,10 @@ GEN_INT_ARITH_DIVW(divwu, 0x0E, 0, 0),
 GEN_INT_ARITH_DIVW(divwuo, 0x1E, 0, 1),
 GEN_INT_ARITH_DIVW(divw, 0x0F, 1, 0),
 GEN_INT_ARITH_DIVW(divwo, 0x1F, 1, 1),
+GEN_HANDLER_E(divwe, 0x1F, 0x0B, 0x0D, 0, PPC_NONE, PPC2_DIVE_ISA206),
+GEN_HANDLER_E(divweo, 0x1F, 0x0B, 0x1D, 0, PPC_NONE, PPC2_DIVE_ISA206),
+GEN_HANDLER_E(divweu, 0x1F, 0x0B, 0x0C, 0, PPC_NONE, PPC2_DIVE_ISA206),
+GEN_HANDLER_E(divweuo, 0x1F, 0x0B, 0x1C, 0, PPC_NONE, PPC2_DIVE_ISA206),
 
 #if defined(TARGET_PPC64)
 #undef GEN_INT_ARITH_DIVD
@@ -9437,6 +10162,11 @@ GEN_INT_ARITH_DIVD(divduo, 0x1E, 0, 1),
 GEN_INT_ARITH_DIVD(divd, 0x0F, 1, 0),
 GEN_INT_ARITH_DIVD(divdo, 0x1F, 1, 1),
 
+GEN_HANDLER_E(divdeu, 0x1F, 0x09, 0x0C, 0, PPC_NONE, PPC2_DIVE_ISA206),
+GEN_HANDLER_E(divdeuo, 0x1F, 0x09, 0x1C, 0, PPC_NONE, PPC2_DIVE_ISA206),
+GEN_HANDLER_E(divde, 0x1F, 0x09, 0x0D, 0, PPC_NONE, PPC2_DIVE_ISA206),
+GEN_HANDLER_E(divdeo, 0x1F, 0x09, 0x1D, 0, PPC_NONE, PPC2_DIVE_ISA206),
+
 #undef GEN_INT_ARITH_MUL_HELPER
 #define GEN_INT_ARITH_MUL_HELPER(name, opc3)                                  \
 GEN_HANDLER(name, 0x1F, 0x09, opc3, 0x00000000, PPC_64B)
@@ -9544,13 +10274,22 @@ GEN_FLOAT_ACB(madd, 0x1D, 1, PPC_FLOAT),
 GEN_FLOAT_ACB(msub, 0x1C, 1, PPC_FLOAT),
 GEN_FLOAT_ACB(nmadd, 0x1F, 1, PPC_FLOAT),
 GEN_FLOAT_ACB(nmsub, 0x1E, 1, PPC_FLOAT),
+GEN_HANDLER_E(ftdiv, 0x3F, 0x00, 0x04, 1, PPC_NONE, PPC2_FP_TST_ISA206),
+GEN_HANDLER_E(ftsqrt, 0x3F, 0x00, 0x05, 1, PPC_NONE, PPC2_FP_TST_ISA206),
 GEN_FLOAT_B(ctiw, 0x0E, 0x00, 0, PPC_FLOAT),
+GEN_HANDLER_E(fctiwu, 0x3F, 0x0E, 0x04, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
 GEN_FLOAT_B(ctiwz, 0x0F, 0x00, 0, PPC_FLOAT),
+GEN_HANDLER_E(fctiwuz, 0x3F, 0x0F, 0x04, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
 GEN_FLOAT_B(rsp, 0x0C, 0x00, 1, PPC_FLOAT),
 #if defined(TARGET_PPC64)
 GEN_FLOAT_B(cfid, 0x0E, 0x1A, 1, PPC_64B),
+GEN_HANDLER_E(fcfids, 0x3B, 0x0E, 0x1A, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
+GEN_HANDLER_E(fcfidu, 0x3F, 0x0E, 0x1E, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
+GEN_HANDLER_E(fcfidus, 0x3B, 0x0E, 0x1E, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
 GEN_FLOAT_B(ctid, 0x0E, 0x19, 0, PPC_64B),
+GEN_HANDLER_E(fctidu, 0x3F, 0x0E, 0x1D, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
 GEN_FLOAT_B(ctidz, 0x0F, 0x19, 0, PPC_64B),
+GEN_HANDLER_E(fctiduz, 0x3F, 0x0F, 0x1D, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
 #endif
 GEN_FLOAT_B(rin, 0x08, 0x0C, 1, PPC_FLOAT_EXT),
 GEN_FLOAT_B(riz, 0x08, 0x0D, 1, PPC_FLOAT_EXT),
@@ -9642,6 +10381,7 @@ GEN_LDXF(name, ldop, 0x17, op | 0x00, type)
 GEN_LDFS(lfd, ld64, 0x12, PPC_FLOAT)
 GEN_LDFS(lfs, ld32fs, 0x10, PPC_FLOAT)
 GEN_HANDLER_E(lfiwax, 0x1f, 0x17, 0x1a, 0x00000001, PPC_NONE, PPC2_ISA205),
+GEN_HANDLER_E(lfiwzx, 0x1f, 0x17, 0x1b, 0x1, PPC_NONE, PPC2_FP_CVT_ISA206),
 GEN_HANDLER_E(lfdp, 0x39, 0xFF, 0xFF, 0x00200003, PPC_NONE, PPC2_ISA205),
 GEN_HANDLER_E(lfdpx, 0x1F, 0x17, 0x18, 0x00200001, PPC_NONE, PPC2_ISA205),
 
@@ -9754,33 +10494,61 @@ GEN_VR_STVE(wx, 0x07, 0x06),
 #undef GEN_VX_LOGICAL
 #define GEN_VX_LOGICAL(name, tcg_op, opc2, opc3)                        \
 GEN_HANDLER(name, 0x04, opc2, opc3, 0x00000000, PPC_ALTIVEC)
+
+#undef GEN_VX_LOGICAL_207
+#define GEN_VX_LOGICAL_207(name, tcg_op, opc2, opc3) \
+GEN_HANDLER_E(name, 0x04, opc2, opc3, 0x00000000, PPC_NONE, PPC2_ALTIVEC_207)
+
 GEN_VX_LOGICAL(vand, tcg_gen_and_i64, 2, 16),
 GEN_VX_LOGICAL(vandc, tcg_gen_andc_i64, 2, 17),
 GEN_VX_LOGICAL(vor, tcg_gen_or_i64, 2, 18),
 GEN_VX_LOGICAL(vxor, tcg_gen_xor_i64, 2, 19),
 GEN_VX_LOGICAL(vnor, tcg_gen_nor_i64, 2, 20),
+GEN_VX_LOGICAL_207(veqv, tcg_gen_eqv_i64, 2, 26),
+GEN_VX_LOGICAL_207(vnand, tcg_gen_nand_i64, 2, 22),
+GEN_VX_LOGICAL_207(vorc, tcg_gen_orc_i64, 2, 21),
 
 #undef GEN_VXFORM
 #define GEN_VXFORM(name, opc2, opc3)                                    \
 GEN_HANDLER(name, 0x04, opc2, opc3, 0x00000000, PPC_ALTIVEC)
+
+#undef GEN_VXFORM_207
+#define GEN_VXFORM_207(name, opc2, opc3) \
+GEN_HANDLER_E(name, 0x04, opc2, opc3, 0x00000000, PPC_NONE, PPC2_ALTIVEC_207)
+
+#undef GEN_VXFORM_DUAL
+#define GEN_VXFORM_DUAL(name0, name1, opc2, opc3, type0, type1) \
+GEN_HANDLER_E(name0##_##name1, 0x4, opc2, opc3, 0x00000000, type0, type1)
+
+#undef GEN_VXRFORM_DUAL
+#define GEN_VXRFORM_DUAL(name0, name1, opc2, opc3, tp0, tp1) \
+GEN_HANDLER_E(name0##_##name1, 0x4, opc2, opc3, 0x00000000, tp0, tp1), \
+GEN_HANDLER_E(name0##_##name1, 0x4, opc2, (opc3 | 0x10), 0x00000000, tp0, tp1),
+
 GEN_VXFORM(vaddubm, 0, 0),
 GEN_VXFORM(vadduhm, 0, 1),
 GEN_VXFORM(vadduwm, 0, 2),
-GEN_VXFORM(vsububm, 0, 16),
-GEN_VXFORM(vsubuhm, 0, 17),
+GEN_VXFORM_207(vaddudm, 0, 3),
+GEN_VXFORM_DUAL(vsububm, bcdadd, 0, 16, PPC_ALTIVEC, PPC_NONE),
+GEN_VXFORM_DUAL(vsubuhm, bcdsub, 0, 17, PPC_ALTIVEC, PPC_NONE),
 GEN_VXFORM(vsubuwm, 0, 18),
+GEN_VXFORM_207(vsubudm, 0, 19),
 GEN_VXFORM(vmaxub, 1, 0),
 GEN_VXFORM(vmaxuh, 1, 1),
 GEN_VXFORM(vmaxuw, 1, 2),
+GEN_VXFORM_207(vmaxud, 1, 3),
 GEN_VXFORM(vmaxsb, 1, 4),
 GEN_VXFORM(vmaxsh, 1, 5),
 GEN_VXFORM(vmaxsw, 1, 6),
+GEN_VXFORM_207(vmaxsd, 1, 7),
 GEN_VXFORM(vminub, 1, 8),
 GEN_VXFORM(vminuh, 1, 9),
 GEN_VXFORM(vminuw, 1, 10),
+GEN_VXFORM_207(vminud, 1, 11),
 GEN_VXFORM(vminsb, 1, 12),
 GEN_VXFORM(vminsh, 1, 13),
 GEN_VXFORM(vminsw, 1, 14),
+GEN_VXFORM_207(vminsd, 1, 15),
 GEN_VXFORM(vavgub, 1, 16),
 GEN_VXFORM(vavguh, 1, 17),
 GEN_VXFORM(vavguw, 1, 18),
@@ -9793,23 +10561,32 @@ GEN_VXFORM(vmrghw, 6, 2),
 GEN_VXFORM(vmrglb, 6, 4),
 GEN_VXFORM(vmrglh, 6, 5),
 GEN_VXFORM(vmrglw, 6, 6),
+GEN_VXFORM_207(vmrgew, 6, 30),
+GEN_VXFORM_207(vmrgow, 6, 26),
 GEN_VXFORM(vmuloub, 4, 0),
 GEN_VXFORM(vmulouh, 4, 1),
+GEN_VXFORM_DUAL(vmulouw, vmuluwm, 4, 2, PPC_ALTIVEC, PPC_NONE),
 GEN_VXFORM(vmulosb, 4, 4),
 GEN_VXFORM(vmulosh, 4, 5),
+GEN_VXFORM_207(vmulosw, 4, 6),
 GEN_VXFORM(vmuleub, 4, 8),
 GEN_VXFORM(vmuleuh, 4, 9),
+GEN_VXFORM_207(vmuleuw, 4, 10),
 GEN_VXFORM(vmulesb, 4, 12),
 GEN_VXFORM(vmulesh, 4, 13),
+GEN_VXFORM_207(vmulesw, 4, 14),
 GEN_VXFORM(vslb, 2, 4),
 GEN_VXFORM(vslh, 2, 5),
 GEN_VXFORM(vslw, 2, 6),
+GEN_VXFORM_207(vsld, 2, 23),
 GEN_VXFORM(vsrb, 2, 8),
 GEN_VXFORM(vsrh, 2, 9),
 GEN_VXFORM(vsrw, 2, 10),
+GEN_VXFORM_207(vsrd, 2, 27),
 GEN_VXFORM(vsrab, 2, 12),
 GEN_VXFORM(vsrah, 2, 13),
 GEN_VXFORM(vsraw, 2, 14),
+GEN_VXFORM_207(vsrad, 2, 15),
 GEN_VXFORM(vslo, 6, 16),
 GEN_VXFORM(vsro, 6, 17),
 GEN_VXFORM(vaddcuw, 0, 6),
@@ -9820,25 +10597,36 @@ GEN_VXFORM(vadduws, 0, 10),
 GEN_VXFORM(vaddsbs, 0, 12),
 GEN_VXFORM(vaddshs, 0, 13),
 GEN_VXFORM(vaddsws, 0, 14),
-GEN_VXFORM(vsububs, 0, 24),
-GEN_VXFORM(vsubuhs, 0, 25),
+GEN_VXFORM_DUAL(vsububs, bcdadd, 0, 24, PPC_ALTIVEC, PPC_NONE),
+GEN_VXFORM_DUAL(vsubuhs, bcdsub, 0, 25, PPC_ALTIVEC, PPC_NONE),
 GEN_VXFORM(vsubuws, 0, 26),
 GEN_VXFORM(vsubsbs, 0, 28),
 GEN_VXFORM(vsubshs, 0, 29),
 GEN_VXFORM(vsubsws, 0, 30),
+GEN_VXFORM_207(vadduqm, 0, 4),
+GEN_VXFORM_207(vaddcuq, 0, 5),
+GEN_VXFORM_DUAL(vaddeuqm, vaddecuq, 30, 0xFF, PPC_NONE, PPC2_ALTIVEC_207),
+GEN_VXFORM_207(vsubuqm, 0, 20),
+GEN_VXFORM_207(vsubcuq, 0, 21),
+GEN_VXFORM_DUAL(vsubeuqm, vsubecuq, 31, 0xFF, PPC_NONE, PPC2_ALTIVEC_207),
 GEN_VXFORM(vrlb, 2, 0),
 GEN_VXFORM(vrlh, 2, 1),
 GEN_VXFORM(vrlw, 2, 2),
+GEN_VXFORM_207(vrld, 2, 3),
 GEN_VXFORM(vsl, 2, 7),
 GEN_VXFORM(vsr, 2, 11),
 GEN_VXFORM(vpkuhum, 7, 0),
 GEN_VXFORM(vpkuwum, 7, 1),
+GEN_VXFORM_207(vpkudum, 7, 17),
 GEN_VXFORM(vpkuhus, 7, 2),
 GEN_VXFORM(vpkuwus, 7, 3),
+GEN_VXFORM_207(vpkudus, 7, 19),
 GEN_VXFORM(vpkshus, 7, 4),
 GEN_VXFORM(vpkswus, 7, 5),
+GEN_VXFORM_207(vpksdus, 7, 21),
 GEN_VXFORM(vpkshss, 7, 6),
 GEN_VXFORM(vpkswss, 7, 7),
+GEN_VXFORM_207(vpksdss, 7, 23),
 GEN_VXFORM(vpkpx, 7, 12),
 GEN_VXFORM(vsum4ubs, 4, 24),
 GEN_VXFORM(vsum4sbs, 4, 28),
@@ -9866,10 +10654,10 @@ GEN_VXRFORM(vcmpgtsw, 3, 14)
 GEN_VXRFORM(vcmpgtub, 3, 8)
 GEN_VXRFORM(vcmpgtuh, 3, 9)
 GEN_VXRFORM(vcmpgtuw, 3, 10)
-GEN_VXRFORM(vcmpeqfp, 3, 3)
+GEN_VXRFORM_DUAL(vcmpeqfp, vcmpequd, 3, 3, PPC_ALTIVEC, PPC_NONE)
 GEN_VXRFORM(vcmpgefp, 3, 7)
-GEN_VXRFORM(vcmpgtfp, 3, 11)
-GEN_VXRFORM(vcmpbfp, 3, 15)
+GEN_VXRFORM_DUAL(vcmpgtfp, vcmpgtud, 3, 11, PPC_ALTIVEC, PPC_NONE)
+GEN_VXRFORM_DUAL(vcmpbfp, vcmpgtsd, 3, 15, PPC_ALTIVEC, PPC_NONE)
 
 #undef GEN_VXFORM_SIMM
 #define GEN_VXFORM_SIMM(name, opc2, opc3)                               \
@@ -9883,8 +10671,10 @@ GEN_VXFORM_SIMM(vspltisw, 6, 14),
     GEN_HANDLER(name, 0x04, opc2, opc3, 0x001f0000, PPC_ALTIVEC)
 GEN_VXFORM_NOA(vupkhsb, 7, 8),
 GEN_VXFORM_NOA(vupkhsh, 7, 9),
+GEN_VXFORM_207(vupkhsw, 7, 25),
 GEN_VXFORM_NOA(vupklsb, 7, 10),
 GEN_VXFORM_NOA(vupklsh, 7, 11),
+GEN_VXFORM_207(vupklsw, 7, 27),
 GEN_VXFORM_NOA(vupkhpx, 7, 13),
 GEN_VXFORM_NOA(vupklpx, 7, 15),
 GEN_VXFORM_NOA(vrefp, 5, 4),
@@ -9917,15 +10707,50 @@ GEN_VAFORM_PAIRED(vmsumshm, vmsumshs, 20),
 GEN_VAFORM_PAIRED(vsel, vperm, 21),
 GEN_VAFORM_PAIRED(vmaddfp, vnmsubfp, 23),
 
+GEN_VXFORM_DUAL(vclzb, vpopcntb, 1, 28, PPC_NONE, PPC2_ALTIVEC_207),
+GEN_VXFORM_DUAL(vclzh, vpopcnth, 1, 29, PPC_NONE, PPC2_ALTIVEC_207),
+GEN_VXFORM_DUAL(vclzw, vpopcntw, 1, 30, PPC_NONE, PPC2_ALTIVEC_207),
+GEN_VXFORM_DUAL(vclzd, vpopcntd, 1, 31, PPC_NONE, PPC2_ALTIVEC_207),
+
+GEN_VXFORM_207(vbpermq, 6, 21),
+GEN_VXFORM_207(vgbbd, 6, 20),
+GEN_VXFORM_207(vpmsumb, 4, 16),
+GEN_VXFORM_207(vpmsumh, 4, 17),
+GEN_VXFORM_207(vpmsumw, 4, 18),
+GEN_VXFORM_207(vpmsumd, 4, 19),
+
+GEN_VXFORM_207(vsbox, 4, 23),
+
+GEN_VXFORM_DUAL(vcipher, vcipherlast, 4, 20, PPC_NONE, PPC2_ALTIVEC_207),
+GEN_VXFORM_DUAL(vncipher, vncipherlast, 4, 21, PPC_NONE, PPC2_ALTIVEC_207),
+
+GEN_VXFORM_207(vshasigmaw, 1, 26),
+GEN_VXFORM_207(vshasigmad, 1, 27),
+
+GEN_VXFORM_DUAL(vsldoi, vpermxor, 22, 0xFF, PPC_ALTIVEC, PPC_NONE),
+
 GEN_HANDLER_E(lxsdx, 0x1F, 0x0C, 0x12, 0, PPC_NONE, PPC2_VSX),
+GEN_HANDLER_E(lxsiwax, 0x1F, 0x0C, 0x02, 0, PPC_NONE, PPC2_VSX207),
+GEN_HANDLER_E(lxsiwzx, 0x1F, 0x0C, 0x00, 0, PPC_NONE, PPC2_VSX207),
+GEN_HANDLER_E(lxsspx, 0x1F, 0x0C, 0x10, 0, PPC_NONE, PPC2_VSX207),
 GEN_HANDLER_E(lxvd2x, 0x1F, 0x0C, 0x1A, 0, PPC_NONE, PPC2_VSX),
 GEN_HANDLER_E(lxvdsx, 0x1F, 0x0C, 0x0A, 0, PPC_NONE, PPC2_VSX),
 GEN_HANDLER_E(lxvw4x, 0x1F, 0x0C, 0x18, 0, PPC_NONE, PPC2_VSX),
 
 GEN_HANDLER_E(stxsdx, 0x1F, 0xC, 0x16, 0, PPC_NONE, PPC2_VSX),
+GEN_HANDLER_E(stxsiwx, 0x1F, 0xC, 0x04, 0, PPC_NONE, PPC2_VSX207),
+GEN_HANDLER_E(stxsspx, 0x1F, 0xC, 0x14, 0, PPC_NONE, PPC2_VSX207),
 GEN_HANDLER_E(stxvd2x, 0x1F, 0xC, 0x1E, 0, PPC_NONE, PPC2_VSX),
 GEN_HANDLER_E(stxvw4x, 0x1F, 0xC, 0x1C, 0, PPC_NONE, PPC2_VSX),
 
+GEN_HANDLER_E(mfvsrwz, 0x1F, 0x13, 0x03, 0x0000F800, PPC_NONE, PPC2_VSX207),
+GEN_HANDLER_E(mtvsrwa, 0x1F, 0x13, 0x06, 0x0000F800, PPC_NONE, PPC2_VSX207),
+GEN_HANDLER_E(mtvsrwz, 0x1F, 0x13, 0x07, 0x0000F800, PPC_NONE, PPC2_VSX207),
+#if defined(TARGET_PPC64)
+GEN_HANDLER_E(mfvsrd, 0x1F, 0x13, 0x01, 0x0000F800, PPC_NONE, PPC2_VSX207),
+GEN_HANDLER_E(mtvsrd, 0x1F, 0x13, 0x05, 0x0000F800, PPC_NONE, PPC2_VSX207),
+#endif
+
 #undef GEN_XX2FORM
 #define GEN_XX2FORM(name, opc2, opc3, fl2)                           \
 GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0, opc3, 0, PPC_NONE, fl2), \
@@ -9938,6 +10763,17 @@ GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 0, PPC_NONE, fl2), \
 GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 2, opc3, 0, PPC_NONE, fl2), \
 GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 3, opc3, 0, PPC_NONE, fl2)
 
+#undef GEN_XX3_RC_FORM
+#define GEN_XX3_RC_FORM(name, opc2, opc3, fl2)                          \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x00, opc3 | 0x00, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x01, opc3 | 0x00, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x02, opc3 | 0x00, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x03, opc3 | 0x00, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x00, opc3 | 0x10, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x01, opc3 | 0x10, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x02, opc3 | 0x10, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x03, opc3 | 0x10, 0, PPC_NONE, fl2)
+
 #undef GEN_XX3FORM_DM
 #define GEN_XX3FORM_DM(name, opc2, opc3) \
 GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x00, opc3|0x00, 0, PPC_NONE, PPC2_VSX),\
@@ -9971,6 +10807,136 @@ GEN_XX2FORM(xvnabssp, 0x12, 0x1A, PPC2_VSX),
 GEN_XX2FORM(xvnegsp, 0x12, 0x1B, PPC2_VSX),
 GEN_XX3FORM(xvcpsgnsp, 0x00, 0x1A, PPC2_VSX),
 
+GEN_XX3FORM(xsadddp, 0x00, 0x04, PPC2_VSX),
+GEN_XX3FORM(xssubdp, 0x00, 0x05, PPC2_VSX),
+GEN_XX3FORM(xsmuldp, 0x00, 0x06, PPC2_VSX),
+GEN_XX3FORM(xsdivdp, 0x00, 0x07, PPC2_VSX),
+GEN_XX2FORM(xsredp,  0x14, 0x05, PPC2_VSX),
+GEN_XX2FORM(xssqrtdp,  0x16, 0x04, PPC2_VSX),
+GEN_XX2FORM(xsrsqrtedp,  0x14, 0x04, PPC2_VSX),
+GEN_XX3FORM(xstdivdp,  0x14, 0x07, PPC2_VSX),
+GEN_XX2FORM(xstsqrtdp,  0x14, 0x06, PPC2_VSX),
+GEN_XX3FORM(xsmaddadp, 0x04, 0x04, PPC2_VSX),
+GEN_XX3FORM(xsmaddmdp, 0x04, 0x05, PPC2_VSX),
+GEN_XX3FORM(xsmsubadp, 0x04, 0x06, PPC2_VSX),
+GEN_XX3FORM(xsmsubmdp, 0x04, 0x07, PPC2_VSX),
+GEN_XX3FORM(xsnmaddadp, 0x04, 0x14, PPC2_VSX),
+GEN_XX3FORM(xsnmaddmdp, 0x04, 0x15, PPC2_VSX),
+GEN_XX3FORM(xsnmsubadp, 0x04, 0x16, PPC2_VSX),
+GEN_XX3FORM(xsnmsubmdp, 0x04, 0x17, PPC2_VSX),
+GEN_XX2FORM(xscmpodp,  0x0C, 0x05, PPC2_VSX),
+GEN_XX2FORM(xscmpudp,  0x0C, 0x04, PPC2_VSX),
+GEN_XX3FORM(xsmaxdp, 0x00, 0x14, PPC2_VSX),
+GEN_XX3FORM(xsmindp, 0x00, 0x15, PPC2_VSX),
+GEN_XX2FORM(xscvdpsp, 0x12, 0x10, PPC2_VSX),
+GEN_XX2FORM(xscvdpspn, 0x16, 0x10, PPC2_VSX207),
+GEN_XX2FORM(xscvspdp, 0x12, 0x14, PPC2_VSX),
+GEN_XX2FORM(xscvspdpn, 0x16, 0x14, PPC2_VSX207),
+GEN_XX2FORM(xscvdpsxds, 0x10, 0x15, PPC2_VSX),
+GEN_XX2FORM(xscvdpsxws, 0x10, 0x05, PPC2_VSX),
+GEN_XX2FORM(xscvdpuxds, 0x10, 0x14, PPC2_VSX),
+GEN_XX2FORM(xscvdpuxws, 0x10, 0x04, PPC2_VSX),
+GEN_XX2FORM(xscvsxddp, 0x10, 0x17, PPC2_VSX),
+GEN_XX2FORM(xscvuxddp, 0x10, 0x16, PPC2_VSX),
+GEN_XX2FORM(xsrdpi, 0x12, 0x04, PPC2_VSX),
+GEN_XX2FORM(xsrdpic, 0x16, 0x06, PPC2_VSX),
+GEN_XX2FORM(xsrdpim, 0x12, 0x07, PPC2_VSX),
+GEN_XX2FORM(xsrdpip, 0x12, 0x06, PPC2_VSX),
+GEN_XX2FORM(xsrdpiz, 0x12, 0x05, PPC2_VSX),
+
+GEN_XX3FORM(xsaddsp, 0x00, 0x00, PPC2_VSX207),
+GEN_XX3FORM(xssubsp, 0x00, 0x01, PPC2_VSX207),
+GEN_XX3FORM(xsmulsp, 0x00, 0x02, PPC2_VSX207),
+GEN_XX3FORM(xsdivsp, 0x00, 0x03, PPC2_VSX207),
+GEN_XX2FORM(xsresp,  0x14, 0x01, PPC2_VSX207),
+GEN_XX2FORM(xsrsp, 0x12, 0x11, PPC2_VSX207),
+GEN_XX2FORM(xssqrtsp,  0x16, 0x00, PPC2_VSX207),
+GEN_XX2FORM(xsrsqrtesp,  0x14, 0x00, PPC2_VSX207),
+GEN_XX3FORM(xsmaddasp, 0x04, 0x00, PPC2_VSX207),
+GEN_XX3FORM(xsmaddmsp, 0x04, 0x01, PPC2_VSX207),
+GEN_XX3FORM(xsmsubasp, 0x04, 0x02, PPC2_VSX207),
+GEN_XX3FORM(xsmsubmsp, 0x04, 0x03, PPC2_VSX207),
+GEN_XX3FORM(xsnmaddasp, 0x04, 0x10, PPC2_VSX207),
+GEN_XX3FORM(xsnmaddmsp, 0x04, 0x11, PPC2_VSX207),
+GEN_XX3FORM(xsnmsubasp, 0x04, 0x12, PPC2_VSX207),
+GEN_XX3FORM(xsnmsubmsp, 0x04, 0x13, PPC2_VSX207),
+GEN_XX2FORM(xscvsxdsp, 0x10, 0x13, PPC2_VSX207),
+GEN_XX2FORM(xscvuxdsp, 0x10, 0x12, PPC2_VSX207),
+
+GEN_XX3FORM(xvadddp, 0x00, 0x0C, PPC2_VSX),
+GEN_XX3FORM(xvsubdp, 0x00, 0x0D, PPC2_VSX),
+GEN_XX3FORM(xvmuldp, 0x00, 0x0E, PPC2_VSX),
+GEN_XX3FORM(xvdivdp, 0x00, 0x0F, PPC2_VSX),
+GEN_XX2FORM(xvredp,  0x14, 0x0D, PPC2_VSX),
+GEN_XX2FORM(xvsqrtdp,  0x16, 0x0C, PPC2_VSX),
+GEN_XX2FORM(xvrsqrtedp,  0x14, 0x0C, PPC2_VSX),
+GEN_XX3FORM(xvtdivdp, 0x14, 0x0F, PPC2_VSX),
+GEN_XX2FORM(xvtsqrtdp, 0x14, 0x0E, PPC2_VSX),
+GEN_XX3FORM(xvmaddadp, 0x04, 0x0C, PPC2_VSX),
+GEN_XX3FORM(xvmaddmdp, 0x04, 0x0D, PPC2_VSX),
+GEN_XX3FORM(xvmsubadp, 0x04, 0x0E, PPC2_VSX),
+GEN_XX3FORM(xvmsubmdp, 0x04, 0x0F, PPC2_VSX),
+GEN_XX3FORM(xvnmaddadp, 0x04, 0x1C, PPC2_VSX),
+GEN_XX3FORM(xvnmaddmdp, 0x04, 0x1D, PPC2_VSX),
+GEN_XX3FORM(xvnmsubadp, 0x04, 0x1E, PPC2_VSX),
+GEN_XX3FORM(xvnmsubmdp, 0x04, 0x1F, PPC2_VSX),
+GEN_XX3FORM(xvmaxdp, 0x00, 0x1C, PPC2_VSX),
+GEN_XX3FORM(xvmindp, 0x00, 0x1D, PPC2_VSX),
+GEN_XX3_RC_FORM(xvcmpeqdp, 0x0C, 0x0C, PPC2_VSX),
+GEN_XX3_RC_FORM(xvcmpgtdp, 0x0C, 0x0D, PPC2_VSX),
+GEN_XX3_RC_FORM(xvcmpgedp, 0x0C, 0x0E, PPC2_VSX),
+GEN_XX2FORM(xvcvdpsp, 0x12, 0x18, PPC2_VSX),
+GEN_XX2FORM(xvcvdpsxds, 0x10, 0x1D, PPC2_VSX),
+GEN_XX2FORM(xvcvdpsxws, 0x10, 0x0D, PPC2_VSX),
+GEN_XX2FORM(xvcvdpuxds, 0x10, 0x1C, PPC2_VSX),
+GEN_XX2FORM(xvcvdpuxws, 0x10, 0x0C, PPC2_VSX),
+GEN_XX2FORM(xvcvsxddp, 0x10, 0x1F, PPC2_VSX),
+GEN_XX2FORM(xvcvuxddp, 0x10, 0x1E, PPC2_VSX),
+GEN_XX2FORM(xvcvsxwdp, 0x10, 0x0F, PPC2_VSX),
+GEN_XX2FORM(xvcvuxwdp, 0x10, 0x0E, PPC2_VSX),
+GEN_XX2FORM(xvrdpi, 0x12, 0x0C, PPC2_VSX),
+GEN_XX2FORM(xvrdpic, 0x16, 0x0E, PPC2_VSX),
+GEN_XX2FORM(xvrdpim, 0x12, 0x0F, PPC2_VSX),
+GEN_XX2FORM(xvrdpip, 0x12, 0x0E, PPC2_VSX),
+GEN_XX2FORM(xvrdpiz, 0x12, 0x0D, PPC2_VSX),
+
+GEN_XX3FORM(xvaddsp, 0x00, 0x08, PPC2_VSX),
+GEN_XX3FORM(xvsubsp, 0x00, 0x09, PPC2_VSX),
+GEN_XX3FORM(xvmulsp, 0x00, 0x0A, PPC2_VSX),
+GEN_XX3FORM(xvdivsp, 0x00, 0x0B, PPC2_VSX),
+GEN_XX2FORM(xvresp, 0x14, 0x09, PPC2_VSX),
+GEN_XX2FORM(xvsqrtsp, 0x16, 0x08, PPC2_VSX),
+GEN_XX2FORM(xvrsqrtesp, 0x14, 0x08, PPC2_VSX),
+GEN_XX3FORM(xvtdivsp, 0x14, 0x0B, PPC2_VSX),
+GEN_XX2FORM(xvtsqrtsp, 0x14, 0x0A, PPC2_VSX),
+GEN_XX3FORM(xvmaddasp, 0x04, 0x08, PPC2_VSX),
+GEN_XX3FORM(xvmaddmsp, 0x04, 0x09, PPC2_VSX),
+GEN_XX3FORM(xvmsubasp, 0x04, 0x0A, PPC2_VSX),
+GEN_XX3FORM(xvmsubmsp, 0x04, 0x0B, PPC2_VSX),
+GEN_XX3FORM(xvnmaddasp, 0x04, 0x18, PPC2_VSX),
+GEN_XX3FORM(xvnmaddmsp, 0x04, 0x19, PPC2_VSX),
+GEN_XX3FORM(xvnmsubasp, 0x04, 0x1A, PPC2_VSX),
+GEN_XX3FORM(xvnmsubmsp, 0x04, 0x1B, PPC2_VSX),
+GEN_XX3FORM(xvmaxsp, 0x00, 0x18, PPC2_VSX),
+GEN_XX3FORM(xvminsp, 0x00, 0x19, PPC2_VSX),
+GEN_XX3_RC_FORM(xvcmpeqsp, 0x0C, 0x08, PPC2_VSX),
+GEN_XX3_RC_FORM(xvcmpgtsp, 0x0C, 0x09, PPC2_VSX),
+GEN_XX3_RC_FORM(xvcmpgesp, 0x0C, 0x0A, PPC2_VSX),
+GEN_XX2FORM(xvcvspdp, 0x12, 0x1C, PPC2_VSX),
+GEN_XX2FORM(xvcvspsxds, 0x10, 0x19, PPC2_VSX),
+GEN_XX2FORM(xvcvspsxws, 0x10, 0x09, PPC2_VSX),
+GEN_XX2FORM(xvcvspuxds, 0x10, 0x18, PPC2_VSX),
+GEN_XX2FORM(xvcvspuxws, 0x10, 0x08, PPC2_VSX),
+GEN_XX2FORM(xvcvsxdsp, 0x10, 0x1B, PPC2_VSX),
+GEN_XX2FORM(xvcvuxdsp, 0x10, 0x1A, PPC2_VSX),
+GEN_XX2FORM(xvcvsxwsp, 0x10, 0x0B, PPC2_VSX),
+GEN_XX2FORM(xvcvuxwsp, 0x10, 0x0A, PPC2_VSX),
+GEN_XX2FORM(xvrspi, 0x12, 0x08, PPC2_VSX),
+GEN_XX2FORM(xvrspic, 0x16, 0x0A, PPC2_VSX),
+GEN_XX2FORM(xvrspim, 0x12, 0x0B, PPC2_VSX),
+GEN_XX2FORM(xvrspip, 0x12, 0x0A, PPC2_VSX),
+GEN_XX2FORM(xvrspiz, 0x12, 0x09, PPC2_VSX),
+
 #undef VSX_LOGICAL
 #define VSX_LOGICAL(name, opc2, opc3, fl2) \
 GEN_XX3FORM(name, opc2, opc3, fl2)
@@ -9980,6 +10946,9 @@ VSX_LOGICAL(xxlandc, 0x8, 0x11, PPC2_VSX),
 VSX_LOGICAL(xxlor, 0x8, 0x12, PPC2_VSX),
 VSX_LOGICAL(xxlxor, 0x8, 0x13, PPC2_VSX),
 VSX_LOGICAL(xxlnor, 0x8, 0x14, PPC2_VSX),
+VSX_LOGICAL(xxleqv, 0x8, 0x17, PPC2_VSX207),
+VSX_LOGICAL(xxlnand, 0x8, 0x16, PPC2_VSX207),
+VSX_LOGICAL(xxlorc, 0x8, 0x15, PPC2_VSX207),
 GEN_XX3FORM(xxmrghw, 0x08, 0x02, PPC2_VSX),
 GEN_XX3FORM(xxmrglw, 0x08, 0x06, PPC2_VSX),
 GEN_XX2FORM(xxspltw, 0x08, 0x0A, PPC2_VSX),
@@ -10260,8 +11229,13 @@ void ppc_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
     case POWERPC_MMU_SOFT_74xx:
 #if defined(TARGET_PPC64)
     case POWERPC_MMU_64B:
+    case POWERPC_MMU_2_06:
+    case POWERPC_MMU_2_06a:
+    case POWERPC_MMU_2_06d:
 #endif
-        cpu_fprintf(f, " SDR1 " TARGET_FMT_lx "\n", env->spr[SPR_SDR1]);
+        cpu_fprintf(f, " SDR1 " TARGET_FMT_lx "   DAR " TARGET_FMT_lx
+                       "  DSISR " TARGET_FMT_lx "\n", env->spr[SPR_SDR1],
+                    env->spr[SPR_DAR], env->spr[SPR_DSISR]);
         break;
     case POWERPC_MMU_BOOKE206:
         cpu_fprintf(f, " MAS0 " TARGET_FMT_lx "  MAS1 " TARGET_FMT_lx
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 445c3606fe..3eafbb0d1a 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -628,6 +628,9 @@ static inline void _spr_register(CPUPPCState *env, int num,
     spr->oea_read = oea_read;
     spr->oea_write = oea_write;
 #endif
+#if defined(CONFIG_KVM)
+    spr->one_reg_id = one_reg_id,
+#endif
     env->spr[num] = initial_value;
 }
 
@@ -1064,7 +1067,7 @@ static void gen_spr_amr (CPUPPCState *env)
     spr_register_kvm(env, SPR_AMR, "AMR",
                      SPR_NOACCESS, SPR_NOACCESS,
                      &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_AMR, 0xffffffffffffffffULL);
+                     KVM_REG_PPC_AMR, 0);
     spr_register_kvm(env, SPR_UAMOR, "UAMOR",
                      SPR_NOACCESS, SPR_NOACCESS,
                      &spr_read_generic, &spr_write_generic,
@@ -2578,7 +2581,6 @@ static void gen_spr_8xx (CPUPPCState *env)
  * HRMOR   => SPR 313 (Power 2.04 hypv)
  * HSRR0   => SPR 314 (Power 2.04 hypv)
  * HSRR1   => SPR 315 (Power 2.04 hypv)
- * LPCR    => SPR 316 (970)
  * LPIDR   => SPR 317 (970)
  * EPR     => SPR 702 (Power 2.04 emb)
  * perf    => 768-783 (Power 2.04)
@@ -4720,7 +4722,7 @@ POWERPC_FAMILY(e5500)(ObjectClass *oc, void *data)
                        PPC_FLOAT_STFIWX | PPC_WAIT |
                        PPC_MEM_TLBSYNC | PPC_TLBIVAX | PPC_MEM_SYNC |
                        PPC_64B | PPC_POPCNTB | PPC_POPCNTWD;
-    pcc->insns_flags2 = PPC2_BOOKE206 | PPC2_PRCNTL;
+    pcc->insns_flags2 = PPC2_BOOKE206 | PPC2_PRCNTL | PPC2_PERM_ISA206;
     pcc->msr_mask = 0x000000009402FB36ULL;
     pcc->mmu_model = POWERPC_MMU_BOOKE206;
     pcc->excp_model = POWERPC_EXCP_BOOKE;
@@ -6644,33 +6646,13 @@ static void init_proc_970 (CPUPPCState *env)
                  &spr_read_generic, &spr_write_generic,
                  0x00000000);
     /* XXX : not implemented */
-    spr_register(env, SPR_750FX_HID2, "HID2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
     spr_register(env, SPR_970_HID5, "HID5",
                  SPR_NOACCESS, SPR_NOACCESS,
                  &spr_read_generic, &spr_write_generic,
                  POWERPC970_HID5_INIT);
-    /* XXX : not implemented */
-    spr_register(env, SPR_L2CR, "L2CR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, spr_access_nop,
-                 0x00000000);
     /* Memory management */
     /* XXX: not correct */
     gen_low_BATs(env);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MMUCFG, "MMUCFG",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, SPR_NOACCESS,
-                 0x00000000); /* TOFIX */
-    /* XXX : not implemented */
-    spr_register(env, SPR_MMUCSR0, "MMUCSR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000); /* TOFIX */
     spr_register(env, SPR_HIOR, "SPR_HIOR",
                  SPR_NOACCESS, SPR_NOACCESS,
                  &spr_read_hior, &spr_write_hior,
@@ -6744,44 +6726,24 @@ static void init_proc_970FX (CPUPPCState *env)
                  &spr_read_generic, &spr_write_generic,
                  0x00000000);
     /* XXX : not implemented */
-    spr_register(env, SPR_750FX_HID2, "HID2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
     spr_register(env, SPR_970_HID5, "HID5",
                  SPR_NOACCESS, SPR_NOACCESS,
                  &spr_read_generic, &spr_write_generic,
                  POWERPC970_HID5_INIT);
-    /* XXX : not implemented */
-    spr_register(env, SPR_L2CR, "L2CR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, spr_access_nop,
-                 0x00000000);
     /* Memory management */
     /* XXX: not correct */
     gen_low_BATs(env);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MMUCFG, "MMUCFG",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, SPR_NOACCESS,
-                 0x00000000); /* TOFIX */
-    /* XXX : not implemented */
-    spr_register(env, SPR_MMUCSR0, "MMUCSR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000); /* TOFIX */
     spr_register(env, SPR_HIOR, "SPR_HIOR",
                  SPR_NOACCESS, SPR_NOACCESS,
                  &spr_read_hior, &spr_write_hior,
                  0x00000000);
     spr_register(env, SPR_CTRL, "SPR_CTRL",
                  SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
+                 SPR_NOACCESS, &spr_write_generic,
                  0x00000000);
     spr_register(env, SPR_UCTRL, "SPR_UCTRL",
                  SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
+                 &spr_read_generic, SPR_NOACCESS,
                  0x00000000);
     spr_register(env, SPR_VRSAVE, "SPR_VRSAVE",
                  &spr_read_generic, &spr_write_generic,
@@ -6830,106 +6792,6 @@ POWERPC_FAMILY(970FX)(ObjectClass *oc, void *data)
                  POWERPC_FLAG_BUS_CLK;
 }
 
-static int check_pow_970GX (CPUPPCState *env)
-{
-    if (env->spr[SPR_HID0] & 0x00600000)
-        return 1;
-
-    return 0;
-}
-
-static void init_proc_970GX (CPUPPCState *env)
-{
-    gen_spr_ne_601(env);
-    gen_spr_7xx(env);
-    /* Time base */
-    gen_tbl(env);
-    /* Hardware implementation registers */
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID0, "HID0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_clear,
-                 0x60000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID1, "HID1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_750FX_HID2, "HID2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_970_HID5, "HID5",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 POWERPC970_HID5_INIT);
-    /* XXX : not implemented */
-    spr_register(env, SPR_L2CR, "L2CR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, spr_access_nop,
-                 0x00000000);
-    /* Memory management */
-    /* XXX: not correct */
-    gen_low_BATs(env);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MMUCFG, "MMUCFG",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, SPR_NOACCESS,
-                 0x00000000); /* TOFIX */
-    /* XXX : not implemented */
-    spr_register(env, SPR_MMUCSR0, "MMUCSR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000); /* TOFIX */
-    spr_register(env, SPR_HIOR, "SPR_HIOR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_hior, &spr_write_hior,
-                 0x00000000);
-#if !defined(CONFIG_USER_ONLY)
-    env->slb_nr = 32;
-#endif
-    init_excp_970(env);
-    env->dcache_line_size = 128;
-    env->icache_line_size = 128;
-    /* Allocate hardware IRQ controller */
-    ppc970_irq_init(env);
-    /* Can't find information on what this should be on reset.  This
-     * value is the one used by 74xx processors. */
-    vscr_init(env, 0x00010000);
-}
-
-POWERPC_FAMILY(970GX)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 970 GX";
-    pcc->init_proc = init_proc_970GX;
-    pcc->check_pow = check_pow_970GX;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
-                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
-                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
-                       PPC_FLOAT_STFIWX |
-                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
-                       PPC_64B | PPC_ALTIVEC |
-                       PPC_SEGMENT_64B | PPC_SLBI;
-    pcc->msr_mask = 0x800000000204FF36ULL;
-    pcc->mmu_model = POWERPC_MMU_64B;
-#if defined(CONFIG_SOFTMMU)
-    pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
-#endif
-    pcc->excp_model = POWERPC_EXCP_970;
-    pcc->bus_model = PPC_FLAGS_INPUT_970;
-    pcc->bfd_mach = bfd_mach_ppc64;
-    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
-                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
-                 POWERPC_FLAG_BUS_CLK;
-}
-
 static int check_pow_970MP (CPUPPCState *env)
 {
     if (env->spr[SPR_HID0] & 0x01C00000)
@@ -6956,37 +6818,23 @@ static void init_proc_970MP (CPUPPCState *env)
                  &spr_read_generic, &spr_write_generic,
                  0x00000000);
     /* XXX : not implemented */
-    spr_register(env, SPR_750FX_HID2, "HID2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
     spr_register(env, SPR_970_HID5, "HID5",
                  SPR_NOACCESS, SPR_NOACCESS,
                  &spr_read_generic, &spr_write_generic,
                  POWERPC970_HID5_INIT);
     /* XXX : not implemented */
-    spr_register(env, SPR_L2CR, "L2CR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, spr_access_nop,
-                 0x00000000);
     /* Memory management */
     /* XXX: not correct */
     gen_low_BATs(env);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MMUCFG, "MMUCFG",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, SPR_NOACCESS,
-                 0x00000000); /* TOFIX */
-    /* XXX : not implemented */
-    spr_register(env, SPR_MMUCSR0, "MMUCSR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000); /* TOFIX */
     spr_register(env, SPR_HIOR, "SPR_HIOR",
                  SPR_NOACCESS, SPR_NOACCESS,
                  &spr_read_hior, &spr_write_hior,
                  0x00000000);
+    /* Logical partitionning */
+    spr_register_kvm(env, SPR_LPCR, "LPCR",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_LPCR, 0x00000000);
 #if !defined(CONFIG_USER_ONLY)
     env->slb_nr = 32;
 #endif
@@ -7048,49 +6896,34 @@ static void init_proc_power5plus(CPUPPCState *env)
                  &spr_read_generic, &spr_write_generic,
                  0x00000000);
     /* XXX : not implemented */
-    spr_register(env, SPR_750FX_HID2, "HID2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
     spr_register(env, SPR_970_HID5, "HID5",
                  SPR_NOACCESS, SPR_NOACCESS,
                  &spr_read_generic, &spr_write_generic,
                  POWERPC970_HID5_INIT);
-    /* XXX : not implemented */
-    spr_register(env, SPR_L2CR, "L2CR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, spr_access_nop,
-                 0x00000000);
     /* Memory management */
     /* XXX: not correct */
     gen_low_BATs(env);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MMUCFG, "MMUCFG",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, SPR_NOACCESS,
-                 0x00000000); /* TOFIX */
-    /* XXX : not implemented */
-    spr_register(env, SPR_MMUCSR0, "MMUCSR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000); /* TOFIX */
     spr_register(env, SPR_HIOR, "SPR_HIOR",
                  SPR_NOACCESS, SPR_NOACCESS,
                  &spr_read_hior, &spr_write_hior,
                  0x00000000);
     spr_register(env, SPR_CTRL, "SPR_CTRL",
                  SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
+                 SPR_NOACCESS, &spr_write_generic,
                  0x00000000);
     spr_register(env, SPR_UCTRL, "SPR_UCTRL",
                  SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
+                 &spr_read_generic, SPR_NOACCESS,
                  0x00000000);
     spr_register(env, SPR_VRSAVE, "SPR_VRSAVE",
                  &spr_read_generic, &spr_write_generic,
                  &spr_read_generic, &spr_write_generic,
                  0x00000000);
+    /* Logical partitionning */
+    spr_register_kvm(env, SPR_LPCR, "LPCR",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_LPCR, 0x00000000);
 #if !defined(CONFIG_USER_ONLY)
     env->slb_nr = 64;
 #endif
@@ -7177,21 +7010,15 @@ static void init_proc_POWER7 (CPUPPCState *env)
                      &spr_read_generic, &spr_write_generic,
                      KVM_REG_PPC_PMC6, 0x00000000);
 #endif /* !CONFIG_USER_ONLY */
-    /* Memory management */
-    /* XXX : not implemented */
-    spr_register(env, SPR_MMUCFG, "MMUCFG",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, SPR_NOACCESS,
-                 0x00000000); /* TOFIX */
     gen_spr_amr(env);
     /* XXX : not implemented */
     spr_register(env, SPR_CTRL, "SPR_CTRLT",
                  SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
+                 SPR_NOACCESS, &spr_write_generic,
                  0x80800000);
     spr_register(env, SPR_UCTRL, "SPR_CTRLF",
                  SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
+                 &spr_read_generic, SPR_NOACCESS,
                  0x80800000);
     spr_register(env, SPR_VRSAVE, "SPR_VRSAVE",
                  &spr_read_generic, &spr_write_generic,
@@ -7201,6 +7028,11 @@ static void init_proc_POWER7 (CPUPPCState *env)
                  &spr_read_generic, &spr_write_generic,
                  &spr_read_generic, &spr_write_generic,
                  0x00000000);
+    /* Logical partitionning */
+    spr_register_kvm(env, SPR_LPCR, "LPCR",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_LPCR, 0x00000000);
 #if !defined(CONFIG_USER_ONLY)
     env->slb_nr = 32;
 #endif
@@ -7229,14 +7061,19 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
     pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB |
                        PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
                        PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
+                       PPC_FLOAT_FRSQRTES |
                        PPC_FLOAT_STFIWX |
+                       PPC_FLOAT_EXT |
                        PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
                        PPC_MEM_SYNC | PPC_MEM_EIEIO |
                        PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
                        PPC_64B | PPC_ALTIVEC |
                        PPC_SEGMENT_64B | PPC_SLBI |
                        PPC_POPCNTB | PPC_POPCNTWD;
-    pcc->insns_flags2 = PPC2_VSX | PPC2_DFP | PPC2_DBRX | PPC2_ISA205;
+    pcc->insns_flags2 = PPC2_VSX | PPC2_DFP | PPC2_DBRX | PPC2_ISA205 |
+                        PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 |
+                        PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
+                        PPC2_FP_TST_ISA206;
     pcc->msr_mask = 0x800000000284FF37ULL;
     pcc->mmu_model = POWERPC_MMU_2_06;
 #if defined(CONFIG_SOFTMMU)
@@ -7267,14 +7104,19 @@ POWERPC_FAMILY(POWER7P)(ObjectClass *oc, void *data)
     pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB |
                        PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
                        PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
+                       PPC_FLOAT_FRSQRTES |
                        PPC_FLOAT_STFIWX |
+                       PPC_FLOAT_EXT |
                        PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
                        PPC_MEM_SYNC | PPC_MEM_EIEIO |
                        PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
                        PPC_64B | PPC_ALTIVEC |
                        PPC_SEGMENT_64B | PPC_SLBI |
                        PPC_POPCNTB | PPC_POPCNTWD;
-    pcc->insns_flags2 = PPC2_VSX | PPC2_DFP | PPC2_DBRX | PPC2_ISA205;
+    pcc->insns_flags2 = PPC2_VSX | PPC2_DFP | PPC2_DBRX | PPC2_ISA205 |
+                        PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 |
+                        PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
+                        PPC2_FP_TST_ISA206;
     pcc->msr_mask = 0x800000000204FF37ULL;
     pcc->mmu_model = POWERPC_MMU_2_06;
 #if defined(CONFIG_SOFTMMU)
@@ -7291,6 +7133,18 @@ POWERPC_FAMILY(POWER7P)(ObjectClass *oc, void *data)
     pcc->l1_icache_size = 0x8000;
 }
 
+static void init_proc_POWER8(CPUPPCState *env)
+{
+    /* inherit P7 */
+    init_proc_POWER7(env);
+
+    /* P8 supports the TAR */
+    spr_register(env, SPR_TAR, "TAR",
+                 &spr_read_generic, &spr_write_generic,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+}
+
 POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
@@ -7300,19 +7154,25 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
     dc->desc = "POWER8";
     pcc->pvr = CPU_POWERPC_POWER8_BASE;
     pcc->pvr_mask = CPU_POWERPC_POWER8_MASK;
-    pcc->init_proc = init_proc_POWER7;
+    pcc->init_proc = init_proc_POWER8;
     pcc->check_pow = check_pow_nocheck;
     pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
                        PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
                        PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
+                       PPC_FLOAT_FRSQRTES |
                        PPC_FLOAT_STFIWX |
+                       PPC_FLOAT_EXT |
                        PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
                        PPC_MEM_SYNC | PPC_MEM_EIEIO |
                        PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
-                       PPC_64B | PPC_ALTIVEC |
+                       PPC_64B | PPC_64BX | PPC_ALTIVEC |
                        PPC_SEGMENT_64B | PPC_SLBI |
                        PPC_POPCNTB | PPC_POPCNTWD;
-    pcc->insns_flags2 = PPC2_VSX | PPC2_DFP | PPC2_DBRX;
+    pcc->insns_flags2 = PPC2_VSX | PPC2_VSX207 | PPC2_DFP | PPC2_DBRX |
+                        PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 |
+                        PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
+                        PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 |
+                        PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207;
     pcc->msr_mask = 0x800000000284FF36ULL;
     pcc->mmu_model = POWERPC_MMU_2_06;
 #if defined(CONFIG_SOFTMMU)
@@ -7987,14 +7847,12 @@ static void ppc_cpu_realizefn(DeviceState *dev, Error **errp)
                    max_smt, kvm_enabled() ? "KVM" : "TCG");
         return;
     }
+
+    cpu->cpu_dt_id = (cs->cpu_index / smp_threads) * max_smt
+        + (cs->cpu_index % smp_threads);
 #endif
 
-    if (kvm_enabled()) {
-        if (kvmppc_fixup_cpu(cpu) != 0) {
-            error_setg(errp, "Unable to virtualize selected CPU with KVM");
-            return;
-        }
-    } else if (tcg_enabled()) {
+    if (tcg_enabled()) {
         if (ppc_fixup_cpu(cpu) != 0) {
             error_setg(errp, "Unable to emulate selected CPU with TCG");
             return;
@@ -8149,9 +8007,10 @@ static void ppc_cpu_realizefn(DeviceState *dev, Error **errp)
         }
         printf("PowerPC %-12s : PVR %08x MSR %016" PRIx64 "\n"
                "    MMU model        : %s\n",
-               pcc->name, pcc->pvr, pcc->msr_mask, mmu_model);
+               object_class_get_name(OBJECT_CLASS(pcc)),
+               pcc->pvr, pcc->msr_mask, mmu_model);
 #if !defined(CONFIG_USER_ONLY)
-        if (env->tlb != NULL) {
+        if (env->tlb.tlb6) {
             printf("                       %d %s TLB in %d ways\n",
                    env->nb_tlb, env->id_tlbs ? "splitted" : "merged",
                    env->nb_ways);
@@ -8598,6 +8457,7 @@ static void ppc_cpu_initfn(Object *obj)
 
     cs->env_ptr = env;
     cpu_exec_init(env);
+    cpu->cpu_dt_id = cs->cpu_index;
 
     env->msr_mask = pcc->msr_mask;
     env->mmu_model = pcc->mmu_model;