6 files changed, 237 insertions, 239 deletions
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index d5af64465f..01f5b57fbc 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -55,6 +55,10 @@
 #define ARMV7M_EXCP_PENDSV  14
 #define ARMV7M_EXCP_SYSTICK 15
 
+/* ARM-specific interrupt pending bits.  */
+#define CPU_INTERRUPT_FIQ   CPU_INTERRUPT_TGT_EXT_1
+
+
 typedef void ARMWriteCPFunc(void *opaque, int cp_info,
                             int srcreg, int operand, uint32_t value);
 typedef uint32_t ARMReadCPFunc(void *opaque, int cp_info,
diff --git a/target-arm/exec.h b/target-arm/exec.h
index 44e1b55aa2..db6608ec8b 100644
--- a/target-arm/exec.h
+++ b/target-arm/exec.h
@@ -21,8 +21,6 @@
 
 register struct CPUARMState *env asm(AREG0);
 
-#define M0   env->iwmmxt.val
-
 #include "cpu.h"
 #include "exec-all.h"
 
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 62ae72ec27..12084167d6 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -848,6 +848,7 @@ void do_interrupt(CPUARMState *env)
                 return;
             }
         }
+        env->cp15.c5_insn = 2;
         /* Fall through to prefetch abort.  */
     case EXCP_PREFETCH_ABORT:
         new_mode = ARM_CPU_MODE_ABT;
@@ -2355,7 +2356,7 @@ static inline int vfp_exceptbits_from_host(int host_bits)
         target_bits |= 2;
     if (host_bits & float_flag_overflow)
         target_bits |= 4;
-    if (host_bits & float_flag_underflow)
+    if (host_bits & (float_flag_underflow | float_flag_output_denormal))
         target_bits |= 8;
     if (host_bits & float_flag_inexact)
         target_bits |= 0x10;
@@ -2526,99 +2527,39 @@ DO_VFP_cmp(s, float32)
 DO_VFP_cmp(d, float64)
 #undef DO_VFP_cmp
 
-/* Integer to float conversion.  */
-float32 VFP_HELPER(uito, s)(uint32_t x, CPUState *env)
-{
-    return uint32_to_float32(x, &env->vfp.fp_status);
-}
-
-float64 VFP_HELPER(uito, d)(uint32_t x, CPUState *env)
-{
-    return uint32_to_float64(x, &env->vfp.fp_status);
-}
-
-float32 VFP_HELPER(sito, s)(uint32_t x, CPUState *env)
-{
-    return int32_to_float32(x, &env->vfp.fp_status);
-}
+/* Integer to float and float to integer conversions */
 
-float64 VFP_HELPER(sito, d)(uint32_t x, CPUState *env)
-{
-    return int32_to_float64(x, &env->vfp.fp_status);
-}
-
-/* Float to integer conversion.  */
-uint32_t VFP_HELPER(toui, s)(float32 x, CPUState *env)
-{
-    if (float32_is_any_nan(x)) {
-        float_raise(float_flag_invalid, &env->vfp.fp_status);
-        return 0;
-    }
-    return float32_to_uint32(x, &env->vfp.fp_status);
-}
-
-uint32_t VFP_HELPER(toui, d)(float64 x, CPUState *env)
-{
-    if (float64_is_any_nan(x)) {
-        float_raise(float_flag_invalid, &env->vfp.fp_status);
-        return 0;
-    }
-    return float64_to_uint32(x, &env->vfp.fp_status);
-}
-
-uint32_t VFP_HELPER(tosi, s)(float32 x, CPUState *env)
-{
-    if (float32_is_any_nan(x)) {
-        float_raise(float_flag_invalid, &env->vfp.fp_status);
-        return 0;
-    }
-    return float32_to_int32(x, &env->vfp.fp_status);
-}
-
-uint32_t VFP_HELPER(tosi, d)(float64 x, CPUState *env)
-{
-    if (float64_is_any_nan(x)) {
-        float_raise(float_flag_invalid, &env->vfp.fp_status);
-        return 0;
-    }
-    return float64_to_int32(x, &env->vfp.fp_status);
+#define CONV_ITOF(name, fsz, sign) \
+    float##fsz HELPER(name)(uint32_t x, void *fpstp) \
+{ \
+    float_status *fpst = fpstp; \
+    return sign##int32_to_##float##fsz(x, fpst); \
 }
 
-uint32_t VFP_HELPER(touiz, s)(float32 x, CPUState *env)
-{
-    if (float32_is_any_nan(x)) {
-        float_raise(float_flag_invalid, &env->vfp.fp_status);
-        return 0;
-    }
-    return float32_to_uint32_round_to_zero(x, &env->vfp.fp_status);
+#define CONV_FTOI(name, fsz, sign, round) \
+uint32_t HELPER(name)(float##fsz x, void *fpstp) \
+{ \
+    float_status *fpst = fpstp; \
+    if (float##fsz##_is_any_nan(x)) { \
+        float_raise(float_flag_invalid, fpst); \
+        return 0; \
+    } \
+    return float##fsz##_to_##sign##int32##round(x, fpst); \
 }
 
-uint32_t VFP_HELPER(touiz, d)(float64 x, CPUState *env)
-{
-    if (float64_is_any_nan(x)) {
-        float_raise(float_flag_invalid, &env->vfp.fp_status);
-        return 0;
-    }
-    return float64_to_uint32_round_to_zero(x, &env->vfp.fp_status);
-}
+#define FLOAT_CONVS(name, p, fsz, sign) \
+CONV_ITOF(vfp_##name##to##p, fsz, sign) \
+CONV_FTOI(vfp_to##name##p, fsz, sign, ) \
+CONV_FTOI(vfp_to##name##z##p, fsz, sign, _round_to_zero)
 
-uint32_t VFP_HELPER(tosiz, s)(float32 x, CPUState *env)
-{
-    if (float32_is_any_nan(x)) {
-        float_raise(float_flag_invalid, &env->vfp.fp_status);
-        return 0;
-    }
-    return float32_to_int32_round_to_zero(x, &env->vfp.fp_status);
-}
+FLOAT_CONVS(si, s, 32, )
+FLOAT_CONVS(si, d, 64, )
+FLOAT_CONVS(ui, s, 32, u)
+FLOAT_CONVS(ui, d, 64, u)
 
-uint32_t VFP_HELPER(tosiz, d)(float64 x, CPUState *env)
-{
-    if (float64_is_any_nan(x)) {
-        float_raise(float_flag_invalid, &env->vfp.fp_status);
-        return 0;
-    }
-    return float64_to_int32_round_to_zero(x, &env->vfp.fp_status);
-}
+#undef CONV_ITOF
+#undef CONV_FTOI
+#undef FLOAT_CONVS
 
 /* floating point conversion */
 float64 VFP_HELPER(fcvtd, s)(float32 x, CPUState *env)
@@ -2641,23 +2582,25 @@ float32 VFP_HELPER(fcvts, d)(float64 x, CPUState *env)
 
 /* VFP3 fixed point conversion.  */
 #define VFP_CONV_FIX(name, p, fsz, itype, sign) \
-float##fsz VFP_HELPER(name##to, p)(uint##fsz##_t  x, uint32_t shift, \
-                                   CPUState *env) \
+float##fsz HELPER(vfp_##name##to##p)(uint##fsz##_t  x, uint32_t shift, \
+                                    void *fpstp) \
 { \
+    float_status *fpst = fpstp; \
     float##fsz tmp; \
-    tmp = sign##int32_to_##float##fsz ((itype##_t)x, &env->vfp.fp_status); \
-    return float##fsz##_scalbn(tmp, -(int)shift, &env->vfp.fp_status); \
+    tmp = sign##int32_to_##float##fsz((itype##_t)x, fpst); \
+    return float##fsz##_scalbn(tmp, -(int)shift, fpst); \
 } \
-uint##fsz##_t VFP_HELPER(to##name, p)(float##fsz x, uint32_t shift, \
-                                      CPUState *env) \
+uint##fsz##_t HELPER(vfp_to##name##p)(float##fsz x, uint32_t shift, \
+                                       void *fpstp) \
 { \
+    float_status *fpst = fpstp; \
     float##fsz tmp; \
     if (float##fsz##_is_any_nan(x)) { \
-        float_raise(float_flag_invalid, &env->vfp.fp_status); \
+        float_raise(float_flag_invalid, fpst); \
         return 0; \
     } \
-    tmp = float##fsz##_scalbn(x, shift, &env->vfp.fp_status); \
-    return float##fsz##_to_##itype##_round_to_zero(tmp, &env->vfp.fp_status); \
+    tmp = float##fsz##_scalbn(x, shift, fpst); \
+    return float##fsz##_to_##itype##_round_to_zero(tmp, fpst); \
 }
 
 VFP_CONV_FIX(sh, d, 64, int16, )
@@ -2720,6 +2663,9 @@ float32 HELPER(recps_f32)(float32 a, float32 b, CPUState *env)
     float_status *s = &env->vfp.standard_fp_status;
     if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) ||
         (float32_is_infinity(b) && float32_is_zero_or_denormal(a))) {
+        if (!(float32_is_zero(a) || float32_is_zero(b))) {
+            float_raise(float_flag_input_denormal, s);
+        }
         return float32_two;
     }
     return float32_sub(float32_two, float32_mul(a, b, s), s);
@@ -2731,6 +2677,9 @@ float32 HELPER(rsqrts_f32)(float32 a, float32 b, CPUState *env)
     float32 product;
     if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) ||
         (float32_is_infinity(b) && float32_is_zero_or_denormal(a))) {
+        if (!(float32_is_zero(a) || float32_is_zero(b))) {
+            float_raise(float_flag_input_denormal, s);
+        }
         return float32_one_point_five;
     }
     product = float32_mul(a, b, s);
@@ -2749,7 +2698,11 @@ float32 HELPER(rsqrts_f32)(float32 a, float32 b, CPUState *env)
  */
 static float64 recip_estimate(float64 a, CPUState *env)
 {
-    float_status *s = &env->vfp.standard_fp_status;
+    /* These calculations mustn't set any fp exception flags,
+     * so we use a local copy of the fp_status.
+     */
+    float_status dummy_status = env->vfp.standard_fp_status;
+    float_status *s = &dummy_status;
     /* q = (int)(a * 512.0) */
     float64 q = float64_mul(float64_512, a, s);
     int64_t q_int = float64_to_int64_round_to_zero(q, s);
@@ -2787,6 +2740,9 @@ float32 HELPER(recpe_f32)(float32 a, CPUState *env)
     } else if (float32_is_infinity(a)) {
         return float32_set_sign(float32_zero, float32_is_neg(a));
     } else if (float32_is_zero_or_denormal(a)) {
+        if (!float32_is_zero(a)) {
+            float_raise(float_flag_input_denormal, s);
+        }
         float_raise(float_flag_divbyzero, s);
         return float32_set_sign(float32_infinity, float32_is_neg(a));
     } else if (a_exp >= 253) {
@@ -2812,7 +2768,11 @@ float32 HELPER(recpe_f32)(float32 a, CPUState *env)
  */
 static float64 recip_sqrt_estimate(float64 a, CPUState *env)
 {
-    float_status *s = &env->vfp.standard_fp_status;
+    /* These calculations mustn't set any fp exception flags,
+     * so we use a local copy of the fp_status.
+     */
+    float_status dummy_status = env->vfp.standard_fp_status;
+    float_status *s = &dummy_status;
     float64 q;
     int64_t q_int;
 
@@ -2874,6 +2834,9 @@ float32 HELPER(rsqrte_f32)(float32 a, CPUState *env)
         }
         return float32_default_nan;
     } else if (float32_is_zero_or_denormal(a)) {
+        if (!float32_is_zero(a)) {
+            float_raise(float_flag_input_denormal, s);
+        }
         float_raise(float_flag_divbyzero, s);
         return float32_set_sign(float32_infinity, float32_is_neg(a));
     } else if (float32_is_neg(a)) {
diff --git a/target-arm/helper.h b/target-arm/helper.h
index ae701e8451..7d5533f613 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -96,36 +96,36 @@ DEF_HELPER_3(vfp_cmped, void, f64, f64, env)
 DEF_HELPER_2(vfp_fcvtds, f64, f32, env)
 DEF_HELPER_2(vfp_fcvtsd, f32, f64, env)
 
-DEF_HELPER_2(vfp_uitos, f32, i32, env)
-DEF_HELPER_2(vfp_uitod, f64, i32, env)
-DEF_HELPER_2(vfp_sitos, f32, i32, env)
-DEF_HELPER_2(vfp_sitod, f64, i32, env)
-
-DEF_HELPER_2(vfp_touis, i32, f32, env)
-DEF_HELPER_2(vfp_touid, i32, f64, env)
-DEF_HELPER_2(vfp_touizs, i32, f32, env)
-DEF_HELPER_2(vfp_touizd, i32, f64, env)
-DEF_HELPER_2(vfp_tosis, i32, f32, env)
-DEF_HELPER_2(vfp_tosid, i32, f64, env)
-DEF_HELPER_2(vfp_tosizs, i32, f32, env)
-DEF_HELPER_2(vfp_tosizd, i32, f64, env)
-
-DEF_HELPER_3(vfp_toshs, i32, f32, i32, env)
-DEF_HELPER_3(vfp_tosls, i32, f32, i32, env)
-DEF_HELPER_3(vfp_touhs, i32, f32, i32, env)
-DEF_HELPER_3(vfp_touls, i32, f32, i32, env)
-DEF_HELPER_3(vfp_toshd, i64, f64, i32, env)
-DEF_HELPER_3(vfp_tosld, i64, f64, i32, env)
-DEF_HELPER_3(vfp_touhd, i64, f64, i32, env)
-DEF_HELPER_3(vfp_tould, i64, f64, i32, env)
-DEF_HELPER_3(vfp_shtos, f32, i32, i32, env)
-DEF_HELPER_3(vfp_sltos, f32, i32, i32, env)
-DEF_HELPER_3(vfp_uhtos, f32, i32, i32, env)
-DEF_HELPER_3(vfp_ultos, f32, i32, i32, env)
-DEF_HELPER_3(vfp_shtod, f64, i64, i32, env)
-DEF_HELPER_3(vfp_sltod, f64, i64, i32, env)
-DEF_HELPER_3(vfp_uhtod, f64, i64, i32, env)
-DEF_HELPER_3(vfp_ultod, f64, i64, i32, env)
+DEF_HELPER_2(vfp_uitos, f32, i32, ptr)
+DEF_HELPER_2(vfp_uitod, f64, i32, ptr)
+DEF_HELPER_2(vfp_sitos, f32, i32, ptr)
+DEF_HELPER_2(vfp_sitod, f64, i32, ptr)
+
+DEF_HELPER_2(vfp_touis, i32, f32, ptr)
+DEF_HELPER_2(vfp_touid, i32, f64, ptr)
+DEF_HELPER_2(vfp_touizs, i32, f32, ptr)
+DEF_HELPER_2(vfp_touizd, i32, f64, ptr)
+DEF_HELPER_2(vfp_tosis, i32, f32, ptr)
+DEF_HELPER_2(vfp_tosid, i32, f64, ptr)
+DEF_HELPER_2(vfp_tosizs, i32, f32, ptr)
+DEF_HELPER_2(vfp_tosizd, i32, f64, ptr)
+
+DEF_HELPER_3(vfp_toshs, i32, f32, i32, ptr)
+DEF_HELPER_3(vfp_tosls, i32, f32, i32, ptr)
+DEF_HELPER_3(vfp_touhs, i32, f32, i32, ptr)
+DEF_HELPER_3(vfp_touls, i32, f32, i32, ptr)
+DEF_HELPER_3(vfp_toshd, i64, f64, i32, ptr)
+DEF_HELPER_3(vfp_tosld, i64, f64, i32, ptr)
+DEF_HELPER_3(vfp_touhd, i64, f64, i32, ptr)
+DEF_HELPER_3(vfp_tould, i64, f64, i32, ptr)
+DEF_HELPER_3(vfp_shtos, f32, i32, i32, ptr)
+DEF_HELPER_3(vfp_sltos, f32, i32, i32, ptr)
+DEF_HELPER_3(vfp_uhtos, f32, i32, i32, ptr)
+DEF_HELPER_3(vfp_ultos, f32, i32, i32, ptr)
+DEF_HELPER_3(vfp_shtod, f64, i64, i32, ptr)
+DEF_HELPER_3(vfp_sltod, f64, i64, i32, ptr)
+DEF_HELPER_3(vfp_uhtod, f64, i64, i32, ptr)
+DEF_HELPER_3(vfp_ultod, f64, i64, i32, ptr)
 
 DEF_HELPER_2(vfp_fcvt_f16_to_f32, f32, i32, env)
 DEF_HELPER_2(vfp_fcvt_f32_to_f16, i32, f32, env)
diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index f5b173aa71..9165519236 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -1802,41 +1802,37 @@ uint32_t HELPER(neon_mul_f32)(uint32_t a, uint32_t b)
     return float32_val(float32_mul(make_float32(a), make_float32(b), NFS));
 }
 
-/* Floating point comparisons produce an integer result.  */
-#define NEON_VOP_FCMP(name, ok) \
-uint32_t HELPER(neon_##name)(uint32_t a, uint32_t b) \
-{ \
-    switch (float32_compare_quiet(make_float32(a), make_float32(b), NFS)) { \
-    ok return ~0; \
-    default: return 0; \
-    } \
+/* Floating point comparisons produce an integer result.
+ * Note that EQ doesn't signal InvalidOp for QNaNs but GE and GT do.
+ * Softfloat routines return 0/1, which we convert to the 0/-1 Neon requires.
+ */
+uint32_t HELPER(neon_ceq_f32)(uint32_t a, uint32_t b)
+{
+    return -float32_eq_quiet(make_float32(a), make_float32(b), NFS);
+}
+
+uint32_t HELPER(neon_cge_f32)(uint32_t a, uint32_t b)
+{
+    return -float32_le(make_float32(b), make_float32(a), NFS);
 }
 
-NEON_VOP_FCMP(ceq_f32, case float_relation_equal:)
-NEON_VOP_FCMP(cge_f32, case float_relation_equal: case float_relation_greater:)
-NEON_VOP_FCMP(cgt_f32, case float_relation_greater:)
+uint32_t HELPER(neon_cgt_f32)(uint32_t a, uint32_t b)
+{
+    return -float32_lt(make_float32(b), make_float32(a), NFS);
+}
 
 uint32_t HELPER(neon_acge_f32)(uint32_t a, uint32_t b)
 {
     float32 f0 = float32_abs(make_float32(a));
     float32 f1 = float32_abs(make_float32(b));
-    switch (float32_compare_quiet(f0, f1, NFS)) {
-    case float_relation_equal:
-    case float_relation_greater:
-        return ~0;
-    default:
-        return 0;
-    }
+    return -float32_le(f1, f0, NFS);
 }
 
 uint32_t HELPER(neon_acgt_f32)(uint32_t a, uint32_t b)
 {
     float32 f0 = float32_abs(make_float32(a));
     float32 f1 = float32_abs(make_float32(b));
-    if (float32_compare_quiet(f0, f1, NFS) == float_relation_greater) {
-        return ~0;
-    }
-    return 0;
+    return -float32_lt(f1, f0, NFS);
 }
 
 #define ELEM(V, N, SIZE) (((V) >> ((N) * (SIZE))) & ((1ull << (SIZE)) - 1))
diff --git a/target-arm/translate.c b/target-arm/translate.c
index a1af436e34..f5507ec3b6 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -909,6 +909,26 @@ VFP_OP2(div)
 
 #undef VFP_OP2
 
+static inline void gen_vfp_F1_mul(int dp)
+{
+    /* Like gen_vfp_mul() but put result in F1 */
+    if (dp) {
+        gen_helper_vfp_muld(cpu_F1d, cpu_F0d, cpu_F1d, cpu_env);
+    } else {
+        gen_helper_vfp_muls(cpu_F1s, cpu_F0s, cpu_F1s, cpu_env);
+    }
+}
+
+static inline void gen_vfp_F1_neg(int dp)
+{
+    /* Like gen_vfp_neg() but put result in F1 */
+    if (dp) {
+        gen_helper_vfp_negd(cpu_F1d, cpu_F0d);
+    } else {
+        gen_helper_vfp_negs(cpu_F1s, cpu_F0s);
+    }
+}
+
 static inline void gen_vfp_abs(int dp)
 {
     if (dp)
@@ -957,63 +977,73 @@ static inline void gen_vfp_F1_ld0(int dp)
         tcg_gen_movi_i32(cpu_F1s, 0);
 }
 
-static inline void gen_vfp_uito(int dp)
-{
-    if (dp)
-        gen_helper_vfp_uitod(cpu_F0d, cpu_F0s, cpu_env);
-    else
-        gen_helper_vfp_uitos(cpu_F0s, cpu_F0s, cpu_env);
-}
-
-static inline void gen_vfp_sito(int dp)
-{
-    if (dp)
-        gen_helper_vfp_sitod(cpu_F0d, cpu_F0s, cpu_env);
-    else
-        gen_helper_vfp_sitos(cpu_F0s, cpu_F0s, cpu_env);
+#define VFP_GEN_ITOF(name) \
+static inline void gen_vfp_##name(int dp, int neon) \
+{ \
+    TCGv_ptr statusptr = tcg_temp_new_ptr(); \
+    int offset; \
+    if (neon) { \
+        offset = offsetof(CPUState, vfp.standard_fp_status); \
+    } else { \
+        offset = offsetof(CPUState, vfp.fp_status); \
+    } \
+    tcg_gen_addi_ptr(statusptr, cpu_env, offset); \
+    if (dp) { \
+        gen_helper_vfp_##name##d(cpu_F0d, cpu_F0s, statusptr); \
+    } else { \
+        gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \
+    } \
+    tcg_temp_free_ptr(statusptr); \
 }
 
-static inline void gen_vfp_toui(int dp)
-{
-    if (dp)
-        gen_helper_vfp_touid(cpu_F0s, cpu_F0d, cpu_env);
-    else
-        gen_helper_vfp_touis(cpu_F0s, cpu_F0s, cpu_env);
-}
+VFP_GEN_ITOF(uito)
+VFP_GEN_ITOF(sito)
+#undef VFP_GEN_ITOF
 
-static inline void gen_vfp_touiz(int dp)
-{
-    if (dp)
-        gen_helper_vfp_touizd(cpu_F0s, cpu_F0d, cpu_env);
-    else
-        gen_helper_vfp_touizs(cpu_F0s, cpu_F0s, cpu_env);
-}
-
-static inline void gen_vfp_tosi(int dp)
-{
-    if (dp)
-        gen_helper_vfp_tosid(cpu_F0s, cpu_F0d, cpu_env);
-    else
-        gen_helper_vfp_tosis(cpu_F0s, cpu_F0s, cpu_env);
+#define VFP_GEN_FTOI(name) \
+static inline void gen_vfp_##name(int dp, int neon) \
+{ \
+    TCGv_ptr statusptr = tcg_temp_new_ptr(); \
+    int offset; \
+    if (neon) { \
+        offset = offsetof(CPUState, vfp.standard_fp_status); \
+    } else { \
+        offset = offsetof(CPUState, vfp.fp_status); \
+    } \
+    tcg_gen_addi_ptr(statusptr, cpu_env, offset); \
+    if (dp) { \
+        gen_helper_vfp_##name##d(cpu_F0s, cpu_F0d, statusptr); \
+    } else { \
+        gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \
+    } \
+    tcg_temp_free_ptr(statusptr); \
 }
 
-static inline void gen_vfp_tosiz(int dp)
-{
-    if (dp)
-        gen_helper_vfp_tosizd(cpu_F0s, cpu_F0d, cpu_env);
-    else
-        gen_helper_vfp_tosizs(cpu_F0s, cpu_F0s, cpu_env);
-}
+VFP_GEN_FTOI(toui)
+VFP_GEN_FTOI(touiz)
+VFP_GEN_FTOI(tosi)
+VFP_GEN_FTOI(tosiz)
+#undef VFP_GEN_FTOI
 
 #define VFP_GEN_FIX(name) \
-static inline void gen_vfp_##name(int dp, int shift) \
+static inline void gen_vfp_##name(int dp, int shift, int neon) \
 { \
     TCGv tmp_shift = tcg_const_i32(shift); \
-    if (dp) \
-        gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, tmp_shift, cpu_env);\
-    else \
-        gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, tmp_shift, cpu_env);\
+    TCGv_ptr statusptr = tcg_temp_new_ptr(); \
+    int offset; \
+    if (neon) { \
+        offset = offsetof(CPUState, vfp.standard_fp_status); \
+    } else { \
+        offset = offsetof(CPUState, vfp.fp_status); \
+    } \
+    tcg_gen_addi_ptr(statusptr, cpu_env, offset); \
+    if (dp) { \
+        gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, tmp_shift, statusptr); \
+    } else { \
+        gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, tmp_shift, statusptr); \
+    } \
     tcg_temp_free_i32(tmp_shift); \
+    tcg_temp_free_ptr(statusptr); \
 }
 VFP_GEN_FIX(tosh)
 VFP_GEN_FIX(tosl)
@@ -1331,7 +1361,7 @@ static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv dest)
     return 0;
 }
 
-/* Disassemble an iwMMXt instruction.  Returns nonzero if an error occured
+/* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
    (ie. an undefined instruction).  */
 static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
 {
@@ -2335,7 +2365,7 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
     return 0;
 }
 
-/* Disassemble an XScale DSP instruction.  Returns nonzero if an error occured
+/* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
    (ie. an undefined instruction).  */
 static int disas_dsp_insn(CPUState *env, DisasContext *s, uint32_t insn)
 {
@@ -2681,7 +2711,7 @@ static TCGv gen_load_and_replicate(DisasContext *s, TCGv addr, int size)
     return tmp;
 }
 
-/* Disassemble a VFP instruction.  Returns nonzero if an error occured
+/* Disassemble a VFP instruction.  Returns nonzero if an error occurred
    (ie. an undefined instruction).  */
 static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
 {
@@ -3021,27 +3051,34 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
             for (;;) {
                 /* Perform the calculation.  */
                 switch (op) {
-                case 0: /* mac: fd + (fn * fm) */
-                    gen_vfp_mul(dp);
-                    gen_mov_F1_vreg(dp, rd);
+                case 0: /* VMLA: fd + (fn * fm) */
+                    /* Note that order of inputs to the add matters for NaNs */
+                    gen_vfp_F1_mul(dp);
+                    gen_mov_F0_vreg(dp, rd);
                     gen_vfp_add(dp);
                     break;
-                case 1: /* nmac: fd - (fn * fm) */
+                case 1: /* VMLS: fd + -(fn * fm) */
                     gen_vfp_mul(dp);
-                    gen_vfp_neg(dp);
-                    gen_mov_F1_vreg(dp, rd);
+                    gen_vfp_F1_neg(dp);
+                    gen_mov_F0_vreg(dp, rd);
                     gen_vfp_add(dp);
                     break;
-                case 2: /* msc: -fd + (fn * fm) */
-                    gen_vfp_mul(dp);
-                    gen_mov_F1_vreg(dp, rd);
-                    gen_vfp_sub(dp);
+                case 2: /* VNMLS: -fd + (fn * fm) */
+                    /* Note that it isn't valid to replace (-A + B) with (B - A)
+                     * or similar plausible looking simplifications
+                     * because this will give wrong results for NaNs.
+                     */
+                    gen_vfp_F1_mul(dp);
+                    gen_mov_F0_vreg(dp, rd);
+                    gen_vfp_neg(dp);
+                    gen_vfp_add(dp);
                     break;
-                case 3: /* nmsc: -fd - (fn * fm)  */
+                case 3: /* VNMLA: -fd + -(fn * fm) */
                     gen_vfp_mul(dp);
+                    gen_vfp_F1_neg(dp);
+                    gen_mov_F0_vreg(dp, rd);
                     gen_vfp_neg(dp);
-                    gen_mov_F1_vreg(dp, rd);
-                    gen_vfp_sub(dp);
+                    gen_vfp_add(dp);
                     break;
                 case 4: /* mul: fn * fm */
                     gen_vfp_mul(dp);
@@ -3156,62 +3193,62 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             gen_helper_vfp_fcvtds(cpu_F0d, cpu_F0s, cpu_env);
                         break;
                     case 16: /* fuito */
-                        gen_vfp_uito(dp);
+                        gen_vfp_uito(dp, 0);
                         break;
                     case 17: /* fsito */
-                        gen_vfp_sito(dp);
+                        gen_vfp_sito(dp, 0);
                         break;
                     case 20: /* fshto */
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
-                        gen_vfp_shto(dp, 16 - rm);
+                        gen_vfp_shto(dp, 16 - rm, 0);
                         break;
                     case 21: /* fslto */
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
-                        gen_vfp_slto(dp, 32 - rm);
+                        gen_vfp_slto(dp, 32 - rm, 0);
                         break;
                     case 22: /* fuhto */
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
-                        gen_vfp_uhto(dp, 16 - rm);
+                        gen_vfp_uhto(dp, 16 - rm, 0);
                         break;
                     case 23: /* fulto */
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
-                        gen_vfp_ulto(dp, 32 - rm);
+                        gen_vfp_ulto(dp, 32 - rm, 0);
                         break;
                     case 24: /* ftoui */
-                        gen_vfp_toui(dp);
+                        gen_vfp_toui(dp, 0);
                         break;
                     case 25: /* ftouiz */
-                        gen_vfp_touiz(dp);
+                        gen_vfp_touiz(dp, 0);
                         break;
                     case 26: /* ftosi */
-                        gen_vfp_tosi(dp);
+                        gen_vfp_tosi(dp, 0);
                         break;
                     case 27: /* ftosiz */
-                        gen_vfp_tosiz(dp);
+                        gen_vfp_tosiz(dp, 0);
                         break;
                     case 28: /* ftosh */
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
-                        gen_vfp_tosh(dp, 16 - rm);
+                        gen_vfp_tosh(dp, 16 - rm, 0);
                         break;
                     case 29: /* ftosl */
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
-                        gen_vfp_tosl(dp, 32 - rm);
+                        gen_vfp_tosl(dp, 32 - rm, 0);
                         break;
                     case 30: /* ftouh */
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
-                        gen_vfp_touh(dp, 16 - rm);
+                        gen_vfp_touh(dp, 16 - rm, 0);
                         break;
                     case 31: /* ftoul */
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
-                        gen_vfp_toul(dp, 32 - rm);
+                        gen_vfp_toul(dp, 32 - rm, 0);
                         break;
                     default: /* undefined */
                         printf ("rn:%d\n", rn);
@@ -5224,14 +5261,14 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, pass));
                     if (!(op & 1)) {
                         if (u)
-                            gen_vfp_ulto(0, shift);
+                            gen_vfp_ulto(0, shift, 1);
                         else
-                            gen_vfp_slto(0, shift);
+                            gen_vfp_slto(0, shift, 1);
                     } else {
                         if (u)
-                            gen_vfp_toul(0, shift);
+                            gen_vfp_toul(0, shift, 1);
                         else
-                            gen_vfp_tosl(0, shift);
+                            gen_vfp_tosl(0, shift, 1);
                     }
                     tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, pass));
                 }
@@ -6044,16 +6081,16 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, cpu_env);
                             break;
                         case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
-                            gen_vfp_sito(0);
+                            gen_vfp_sito(0, 1);
                             break;
                         case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
-                            gen_vfp_uito(0);
+                            gen_vfp_uito(0, 1);
                             break;
                         case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
-                            gen_vfp_tosiz(0);
+                            gen_vfp_tosiz(0, 1);
                             break;
                         case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
-                            gen_vfp_touiz(0);
+                            gen_vfp_touiz(0, 1);
                             break;
                         default:
                             /* Reserved op values were caught by the
@@ -7348,7 +7385,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
                     } else if ((insn & 0x000003e0) == 0x00000060) {
                         tmp = load_reg(s, rm);
                         shift = (insn >> 10) & 3;
-                        /* ??? In many cases it's not neccessary to do a
+                        /* ??? In many cases it's not necessary to do a
                            rotate, a shift is sufficient.  */
                         if (shift != 0)
                             tcg_gen_rotri_i32(tmp, tmp, shift * 8);
@@ -8139,7 +8176,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
         case 1: /* Sign/zero extend.  */
             tmp = load_reg(s, rm);
             shift = (insn >> 4) & 3;
-            /* ??? In many cases it's not neccessary to do a
+            /* ??? In many cases it's not necessary to do a
                rotate, a shift is sufficient.  */
             if (shift != 0)
                 tcg_gen_rotri_i32(tmp, tmp, shift * 8);