3 files changed, 688 insertions, 115 deletions
diff --git a/target-alpha/helper.h b/target-alpha/helper.h
index bedd3c0da4..79cf375a6d 100644
--- a/target-alpha/helper.h
+++ b/target-alpha/helper.h
@@ -83,7 +83,6 @@ DEF_HELPER_2(cpyse, i64, i64, i64)
 
 DEF_HELPER_1(cvtts, i64, i64)
 DEF_HELPER_1(cvtst, i64, i64)
-DEF_HELPER_1(cvttq, i64, i64)
 DEF_HELPER_1(cvtqs, i64, i64)
 DEF_HELPER_1(cvtqt, i64, i64)
 DEF_HELPER_1(cvtqf, i64, i64)
@@ -91,9 +90,25 @@ DEF_HELPER_1(cvtgf, i64, i64)
 DEF_HELPER_1(cvtgq, i64, i64)
 DEF_HELPER_1(cvtqg, i64, i64)
 DEF_HELPER_1(cvtlq, i64, i64)
+
+DEF_HELPER_1(cvttq, i64, i64)
+DEF_HELPER_1(cvttq_c, i64, i64)
+DEF_HELPER_1(cvttq_svic, i64, i64)
+
 DEF_HELPER_1(cvtql, i64, i64)
-DEF_HELPER_1(cvtqlv, i64, i64)
-DEF_HELPER_1(cvtqlsv, i64, i64)
+DEF_HELPER_1(cvtql_v, i64, i64)
+DEF_HELPER_1(cvtql_sv, i64, i64)
+
+DEF_HELPER_1(setroundmode, void, i32)
+DEF_HELPER_1(setflushzero, void, i32)
+DEF_HELPER_0(fp_exc_clear, void)
+DEF_HELPER_0(fp_exc_get, i32)
+DEF_HELPER_2(fp_exc_raise, void, i32, i32)
+DEF_HELPER_2(fp_exc_raise_s, void, i32, i32)
+
+DEF_HELPER_1(ieee_input, i64, i64)
+DEF_HELPER_1(ieee_input_cmp, i64, i64)
+DEF_HELPER_1(ieee_input_s, i64, i64)
 
 #if !defined (CONFIG_USER_ONLY)
 DEF_HELPER_0(hw_rei, void)
diff --git a/target-alpha/op_helper.c b/target-alpha/op_helper.c
index 8bb5d55a23..4d2c2ee58e 100644
--- a/target-alpha/op_helper.c
+++ b/target-alpha/op_helper.c
@@ -370,6 +370,130 @@ uint64_t helper_unpkbw (uint64_t op1)
 
 /* Floating point helpers */
 
+void helper_setroundmode (uint32_t val)
+{
+    set_float_rounding_mode(val, &FP_STATUS);
+}
+
+void helper_setflushzero (uint32_t val)
+{
+    set_flush_to_zero(val, &FP_STATUS);
+}
+
+void helper_fp_exc_clear (void)
+{
+    set_float_exception_flags(0, &FP_STATUS);
+}
+
+uint32_t helper_fp_exc_get (void)
+{
+    return get_float_exception_flags(&FP_STATUS);
+}
+
+/* Raise exceptions for ieee fp insns without software completion.
+   In that case there are no exceptions that don't trap; the mask
+   doesn't apply.  */
+void helper_fp_exc_raise(uint32_t exc, uint32_t regno)
+{
+    if (exc) {
+        uint32_t hw_exc = 0;
+
+        env->ipr[IPR_EXC_MASK] |= 1ull << regno;
+
+        if (exc & float_flag_invalid) {
+            hw_exc |= EXC_M_INV;
+        }
+        if (exc & float_flag_divbyzero) {
+            hw_exc |= EXC_M_DZE;
+        }
+        if (exc & float_flag_overflow) {
+            hw_exc |= EXC_M_FOV;
+        }
+        if (exc & float_flag_underflow) {
+            hw_exc |= EXC_M_UNF;
+        }
+        if (exc & float_flag_inexact) {
+            hw_exc |= EXC_M_INE;
+        }
+        helper_excp(EXCP_ARITH, hw_exc);
+    }
+}
+
+/* Raise exceptions for ieee fp insns with software completion.  */
+void helper_fp_exc_raise_s(uint32_t exc, uint32_t regno)
+{
+    if (exc) {
+        env->fpcr_exc_status |= exc;
+
+        exc &= ~env->fpcr_exc_mask;
+        if (exc) {
+            helper_fp_exc_raise(exc, regno);
+        }
+    }
+}
+
+/* Input remapping without software completion.  Handle denormal-map-to-zero
+   and trap for all other non-finite numbers.  */
+uint64_t helper_ieee_input(uint64_t val)
+{
+    uint32_t exp = (uint32_t)(val >> 52) & 0x7ff;
+    uint64_t frac = val & 0xfffffffffffffull;
+
+    if (exp == 0) {
+        if (frac != 0) {
+            /* If DNZ is set flush denormals to zero on input.  */
+            if (env->fpcr_dnz) {
+                val &= 1ull << 63;
+            } else {
+                helper_excp(EXCP_ARITH, EXC_M_UNF);
+            }
+        }
+    } else if (exp == 0x7ff) {
+        /* Infinity or NaN.  */
+        /* ??? I'm not sure these exception bit flags are correct.  I do
+           know that the Linux kernel, at least, doesn't rely on them and
+           just emulates the insn to figure out what exception to use.  */
+        helper_excp(EXCP_ARITH, frac ? EXC_M_INV : EXC_M_FOV);
+    }
+    return val;
+}
+
+/* Similar, but does not trap for infinities.  Used for comparisons.  */
+uint64_t helper_ieee_input_cmp(uint64_t val)
+{
+    uint32_t exp = (uint32_t)(val >> 52) & 0x7ff;
+    uint64_t frac = val & 0xfffffffffffffull;
+
+    if (exp == 0) {
+        if (frac != 0) {
+            /* If DNZ is set flush denormals to zero on input.  */
+            if (env->fpcr_dnz) {
+                val &= 1ull << 63;
+            } else {
+                helper_excp(EXCP_ARITH, EXC_M_UNF);
+            }
+        }
+    } else if (exp == 0x7ff && frac) {
+        /* NaN.  */
+        helper_excp(EXCP_ARITH, EXC_M_INV);
+    }
+    return val;
+}
+
+/* Input remapping with software completion enabled.  All we have to do
+   is handle denormal-map-to-zero; all other inputs get exceptions as
+   needed from the actual operation.  */
+uint64_t helper_ieee_input_s(uint64_t val)
+{
+    if (env->fpcr_dnz) {
+        uint32_t exp = (uint32_t)(val >> 52) & 0x7ff;
+        if (exp == 0) {
+            val &= 1ull << 63;
+        }
+    }
+    return val;
+}
+
 /* F floating (VAX) */
 static inline uint64_t float32_to_f(float32 fa)
 {
@@ -447,6 +571,9 @@ uint64_t helper_memory_to_f (uint32_t a)
     return r;
 }
 
+/* ??? Emulating VAX arithmetic with IEEE arithmetic is wrong.  We should
+   either implement VAX arithmetic properly or just signal invalid opcode.  */
+
 uint64_t helper_addf (uint64_t a, uint64_t b)
 {
     float32 fa, fb, fr;
@@ -931,10 +1058,107 @@ uint64_t helper_cvtqs (uint64_t a)
     return float32_to_s(fr);
 }
 
-uint64_t helper_cvttq (uint64_t a)
+/* Implement float64 to uint64 conversion without saturation -- we must
+   supply the truncated result.  This behaviour is used by the compiler
+   to get unsigned conversion for free with the same instruction.
+
+   The VI flag is set when overflow or inexact exceptions should be raised.  */
+
+static inline uint64_t helper_cvttq_internal(uint64_t a, int roundmode, int VI)
 {
-    float64 fa = t_to_float64(a);
-    return float64_to_int64_round_to_zero(fa, &FP_STATUS);
+    uint64_t frac, ret = 0;
+    uint32_t exp, sign, exc = 0;
+    int shift;
+
+    sign = (a >> 63);
+    exp = (uint32_t)(a >> 52) & 0x7ff;
+    frac = a & 0xfffffffffffffull;
+
+    if (exp == 0) {
+        if (unlikely(frac != 0)) {
+            goto do_underflow;
+        }
+    } else if (exp == 0x7ff) {
+        exc = (frac ? float_flag_invalid : VI ? float_flag_overflow : 0);
+    } else {
+        /* Restore implicit bit.  */
+        frac |= 0x10000000000000ull;
+
+        shift = exp - 1023 - 52;
+        if (shift >= 0) {
+            /* In this case the number is so large that we must shift
+               the fraction left.  There is no rounding to do.  */
+            if (shift < 63) {
+                ret = frac << shift;
+                if (VI && (ret >> shift) != frac) {
+                    exc = float_flag_overflow;
+                }
+            }
+        } else {
+            uint64_t round;
+
+            /* In this case the number is smaller than the fraction as
+               represented by the 52 bit number.  Here we must think
+               about rounding the result.  Handle this by shifting the
+               fractional part of the number into the high bits of ROUND.
+               This will let us efficiently handle round-to-nearest.  */
+            shift = -shift;
+            if (shift < 63) {
+                ret = frac >> shift;
+                round = frac << (64 - shift);
+            } else {
+                /* The exponent is so small we shift out everything.
+                   Leave a sticky bit for proper rounding below.  */
+            do_underflow:
+                round = 1;
+            }
+
+            if (round) {
+                exc = (VI ? float_flag_inexact : 0);
+                switch (roundmode) {
+                case float_round_nearest_even:
+                    if (round == (1ull << 63)) {
+                        /* Fraction is exactly 0.5; round to even.  */
+                        ret += (ret & 1);
+                    } else if (round > (1ull << 63)) {
+                        ret += 1;
+                    }
+                    break;
+                case float_round_to_zero:
+                    break;
+                case float_round_up:
+                    ret += 1 - sign;
+                    break;
+                case float_round_down:
+                    ret += sign;
+                    break;
+                }
+            }
+        }
+        if (sign) {
+            ret = -ret;
+        }
+    }
+    if (unlikely(exc)) {
+        float_raise(exc, &FP_STATUS);
+    }
+
+    return ret;
+}
+
+uint64_t helper_cvttq(uint64_t a)
+{
+    return helper_cvttq_internal(a, FP_STATUS.float_rounding_mode, 1);
+}
+
+uint64_t helper_cvttq_c(uint64_t a)
+{
+    return helper_cvttq_internal(a, float_round_to_zero, 0);
+}
+
+uint64_t helper_cvttq_svic(uint64_t a)
+{
+    return helper_cvttq_internal(a, float_round_to_zero, 1);
 }
 
 uint64_t helper_cvtqt (uint64_t a)
@@ -979,35 +1203,24 @@ uint64_t helper_cvtlq (uint64_t a)
     return (lo & 0x3FFFFFFF) | (hi & 0xc0000000);
 }
 
-static inline uint64_t __helper_cvtql(uint64_t a, int s, int v)
-{
-    uint64_t r;
-
-    r = ((uint64_t)(a & 0xC0000000)) << 32;
-    r |= ((uint64_t)(a & 0x7FFFFFFF)) << 29;
-
-    if (v && (int64_t)((int32_t)r) != (int64_t)r) {
-        helper_excp(EXCP_ARITH, EXC_M_IOV);
-    }
-    if (s) {
-        /* TODO */
-    }
-    return r;
-}
-
 uint64_t helper_cvtql (uint64_t a)
 {
-    return __helper_cvtql(a, 0, 0);
+    return ((a & 0xC0000000) << 32) | ((a & 0x7FFFFFFF) << 29);
 }
 
-uint64_t helper_cvtqlv (uint64_t a)
+uint64_t helper_cvtql_v (uint64_t a)
 {
-    return __helper_cvtql(a, 0, 1);
+    if ((int32_t)a != (int64_t)a)
+        helper_excp(EXCP_ARITH, EXC_M_IOV);
+    return helper_cvtql(a);
 }
 
-uint64_t helper_cvtqlsv (uint64_t a)
+uint64_t helper_cvtql_sv (uint64_t a)
 {
-    return __helper_cvtql(a, 1, 1);
+    /* ??? I'm pretty sure there's nothing that /sv needs to do that /v
+       doesn't do.  The only thing I can think is that /sv is a valid
+       instruction merely for completeness in the ISA.  */
+    return helper_cvtql_v(a);
 }
 
 /* PALcode support special instructions */
diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index 515c8c7de5..a11e5ed01c 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -33,6 +33,7 @@
 #include "helper.h"
 
 #undef ALPHA_DEBUG_DISAS
+#define CONFIG_SOFTFLOAT_INLINE
 
 #ifdef ALPHA_DEBUG_DISAS
 #  define LOG_DISAS(...) qemu_log_mask(CPU_LOG_TB_IN_ASM, ## __VA_ARGS__)
@@ -49,6 +50,11 @@ struct DisasContext {
 #endif
     CPUAlphaState *env;
     uint32_t amask;
+
+    /* Current rounding mode for this TB.  */
+    int tb_rm;
+    /* Current flush-to-zero setting for this TB.  */
+    int tb_ftz;
 };
 
 /* global register indexes */
@@ -442,62 +448,333 @@ static void gen_fcmov(TCGCond inv_cond, int ra, int rb, int rc)
     gen_set_label(l1);
 }
 
-#define FARITH2(name)                                       \
-static inline void glue(gen_f, name)(int rb, int rc)        \
-{                                                           \
-    if (unlikely(rc == 31))                                 \
-      return;                                               \
-                                                            \
-    if (rb != 31)                                           \
-        gen_helper_ ## name (cpu_fir[rc], cpu_fir[rb]);    \
-    else {                                                  \
-        TCGv tmp = tcg_const_i64(0);                        \
-        gen_helper_ ## name (cpu_fir[rc], tmp);            \
-        tcg_temp_free(tmp);                                 \
-    }                                                       \
+#define QUAL_RM_N       0x080   /* Round mode nearest even */
+#define QUAL_RM_C       0x000   /* Round mode chopped */
+#define QUAL_RM_M       0x040   /* Round mode minus infinity */
+#define QUAL_RM_D       0x0c0   /* Round mode dynamic */
+#define QUAL_RM_MASK    0x0c0
+
+#define QUAL_U          0x100   /* Underflow enable (fp output) */
+#define QUAL_V          0x100   /* Overflow enable (int output) */
+#define QUAL_S          0x400   /* Software completion enable */
+#define QUAL_I          0x200   /* Inexact detection enable */
+
+static void gen_qual_roundmode(DisasContext *ctx, int fn11)
+{
+    TCGv_i32 tmp;
+
+    fn11 &= QUAL_RM_MASK;
+    if (fn11 == ctx->tb_rm) {
+        return;
+    }
+    ctx->tb_rm = fn11;
+
+    tmp = tcg_temp_new_i32();
+    switch (fn11) {
+    case QUAL_RM_N:
+        tcg_gen_movi_i32(tmp, float_round_nearest_even);
+        break;
+    case QUAL_RM_C:
+        tcg_gen_movi_i32(tmp, float_round_to_zero);
+        break;
+    case QUAL_RM_M:
+        tcg_gen_movi_i32(tmp, float_round_down);
+        break;
+    case QUAL_RM_D:
+        tcg_gen_ld8u_i32(tmp, cpu_env, offsetof(CPUState, fpcr_dyn_round));
+        break;
+    }
+
+#if defined(CONFIG_SOFTFLOAT_INLINE)
+    /* ??? The "softfloat.h" interface is to call set_float_rounding_mode.
+       With CONFIG_SOFTFLOAT that expands to an out-of-line call that just
+       sets the one field.  */
+    tcg_gen_st8_i32(tmp, cpu_env,
+                    offsetof(CPUState, fp_status.float_rounding_mode));
+#else
+    gen_helper_setroundmode(tmp);
+#endif
+
+    tcg_temp_free_i32(tmp);
+}
+
+static void gen_qual_flushzero(DisasContext *ctx, int fn11)
+{
+    TCGv_i32 tmp;
+
+    fn11 &= QUAL_U;
+    if (fn11 == ctx->tb_ftz) {
+        return;
+    }
+    ctx->tb_ftz = fn11;
+
+    tmp = tcg_temp_new_i32();
+    if (fn11) {
+        /* Underflow is enabled, use the FPCR setting.  */
+        tcg_gen_ld8u_i32(tmp, cpu_env, offsetof(CPUState, fpcr_flush_to_zero));
+    } else {
+        /* Underflow is disabled, force flush-to-zero.  */
+        tcg_gen_movi_i32(tmp, 1);
+    }
+
+#if defined(CONFIG_SOFTFLOAT_INLINE)
+    tcg_gen_st8_i32(tmp, cpu_env,
+                    offsetof(CPUState, fp_status.flush_to_zero));
+#else
+    gen_helper_setflushzero(tmp);
+#endif
+
+    tcg_temp_free_i32(tmp);
+}
+
+static TCGv gen_ieee_input(int reg, int fn11, int is_cmp)
+{
+    TCGv val = tcg_temp_new();
+    if (reg == 31) {
+        tcg_gen_movi_i64(val, 0);
+    } else if (fn11 & QUAL_S) {
+        gen_helper_ieee_input_s(val, cpu_fir[reg]);
+    } else if (is_cmp) {
+        gen_helper_ieee_input_cmp(val, cpu_fir[reg]);
+    } else {
+        gen_helper_ieee_input(val, cpu_fir[reg]);
+    }
+    return val;
+}
+
+static void gen_fp_exc_clear(void)
+{
+#if defined(CONFIG_SOFTFLOAT_INLINE)
+    TCGv_i32 zero = tcg_const_i32(0);
+    tcg_gen_st8_i32(zero, cpu_env,
+                    offsetof(CPUState, fp_status.float_exception_flags));
+    tcg_temp_free_i32(zero);
+#else
+    gen_helper_fp_exc_clear();
+#endif
+}
+
+static void gen_fp_exc_raise_ignore(int rc, int fn11, int ignore)
+{
+    /* ??? We ought to be able to do something with imprecise exceptions.
+       E.g. notice we're still in the trap shadow of something within the
+       TB and do not generate the code to signal the exception; end the TB
+       when an exception is forced to arrive, either by consumption of a
+       register value or TRAPB or EXCB.  */
+    TCGv_i32 exc = tcg_temp_new_i32();
+    TCGv_i32 reg;
+
+#if defined(CONFIG_SOFTFLOAT_INLINE)
+    tcg_gen_ld8u_i32(exc, cpu_env,
+                     offsetof(CPUState, fp_status.float_exception_flags));
+#else
+    gen_helper_fp_exc_get(exc);
+#endif
+
+    if (ignore) {
+        tcg_gen_andi_i32(exc, exc, ~ignore);
+    }
+
+    /* ??? Pass in the regno of the destination so that the helper can
+       set EXC_MASK, which contains a bitmask of destination registers
+       that have caused arithmetic traps.  A simple userspace emulation
+       does not require this.  We do need it for a guest kernel's entArith,
+       or if we were to do something clever with imprecise exceptions.  */
+    reg = tcg_const_i32(rc + 32);
+
+    if (fn11 & QUAL_S) {
+        gen_helper_fp_exc_raise_s(exc, reg);
+    } else {
+        gen_helper_fp_exc_raise(exc, reg);
+    }
+
+    tcg_temp_free_i32(reg);
+    tcg_temp_free_i32(exc);
+}
+
+static inline void gen_fp_exc_raise(int rc, int fn11)
+{
+    gen_fp_exc_raise_ignore(rc, fn11, fn11 & QUAL_I ? 0 : float_flag_inexact);
 }
-FARITH2(sqrts)
+
+#define FARITH2(name)                                   \
+static inline void glue(gen_f, name)(int rb, int rc)    \
+{                                                       \
+    if (unlikely(rc == 31)) {                           \
+        return;                                         \
+    }                                                   \
+    if (rb != 31) {                                     \
+        gen_helper_ ## name (cpu_fir[rc], cpu_fir[rb]); \
+    } else {						\
+        TCGv tmp = tcg_const_i64(0);                    \
+        gen_helper_ ## name (cpu_fir[rc], tmp);         \
+        tcg_temp_free(tmp);                             \
+    }                                                   \
+}
+FARITH2(cvtlq)
+FARITH2(cvtql)
+FARITH2(cvtql_v)
+FARITH2(cvtql_sv)
+
+/* ??? VAX instruction qualifiers ignored.  */
 FARITH2(sqrtf)
 FARITH2(sqrtg)
-FARITH2(sqrtt)
 FARITH2(cvtgf)
 FARITH2(cvtgq)
 FARITH2(cvtqf)
 FARITH2(cvtqg)
-FARITH2(cvtst)
-FARITH2(cvtts)
-FARITH2(cvttq)
-FARITH2(cvtqs)
-FARITH2(cvtqt)
-FARITH2(cvtlq)
-FARITH2(cvtql)
-FARITH2(cvtqlv)
-FARITH2(cvtqlsv)
-
-#define FARITH3(name)                                                     \
-static inline void glue(gen_f, name)(int ra, int rb, int rc)              \
-{                                                                         \
-    if (unlikely(rc == 31))                                               \
-        return;                                                           \
-                                                                          \
-    if (ra != 31) {                                                       \
-        if (rb != 31)                                                     \
-            gen_helper_ ## name (cpu_fir[rc], cpu_fir[ra], cpu_fir[rb]);  \
-        else {                                                            \
-            TCGv tmp = tcg_const_i64(0);                                  \
-            gen_helper_ ## name (cpu_fir[rc], cpu_fir[ra], tmp);          \
-            tcg_temp_free(tmp);                                           \
-        }                                                                 \
-    } else {                                                              \
-        TCGv tmp = tcg_const_i64(0);                                      \
-        if (rb != 31)                                                     \
-            gen_helper_ ## name (cpu_fir[rc], tmp, cpu_fir[rb]);          \
-        else                                                              \
-            gen_helper_ ## name (cpu_fir[rc], tmp, tmp);                   \
-        tcg_temp_free(tmp);                                               \
-    }                                                                     \
+
+static void gen_ieee_arith2(DisasContext *ctx, void (*helper)(TCGv, TCGv),
+                            int rb, int rc, int fn11)
+{
+    TCGv vb;
+
+    /* ??? This is wrong: the instruction is not a nop, it still may
+       raise exceptions.  */
+    if (unlikely(rc == 31)) {
+        return;
+    }
+
+    gen_qual_roundmode(ctx, fn11);
+    gen_qual_flushzero(ctx, fn11);
+    gen_fp_exc_clear();
+
+    vb = gen_ieee_input(rb, fn11, 0);
+    helper(cpu_fir[rc], vb);
+    tcg_temp_free(vb);
+
+    gen_fp_exc_raise(rc, fn11);
+}
+
+#define IEEE_ARITH2(name)                                       \
+static inline void glue(gen_f, name)(DisasContext *ctx,         \
+                                     int rb, int rc, int fn11)  \
+{                                                               \
+    gen_ieee_arith2(ctx, gen_helper_##name, rb, rc, fn11);      \
+}
+IEEE_ARITH2(sqrts)
+IEEE_ARITH2(sqrtt)
+IEEE_ARITH2(cvtst)
+IEEE_ARITH2(cvtts)
+
+static void gen_fcvttq(DisasContext *ctx, int rb, int rc, int fn11)
+{
+    TCGv vb;
+    int ignore = 0;
+
+    /* ??? This is wrong: the instruction is not a nop, it still may
+       raise exceptions.  */
+    if (unlikely(rc == 31)) {
+        return;
+    }
+
+    /* No need to set flushzero, since we have an integer output.  */
+    gen_fp_exc_clear();
+    vb = gen_ieee_input(rb, fn11, 0);
+
+    /* Almost all integer conversions use cropped rounding, and most
+       also do not have integer overflow enabled.  Special case that.  */
+    switch (fn11) {
+    case QUAL_RM_C:
+        gen_helper_cvttq_c(cpu_fir[rc], vb);
+        break;
+    case QUAL_V | QUAL_RM_C:
+    case QUAL_S | QUAL_V | QUAL_RM_C:
+        ignore = float_flag_inexact;
+        /* FALLTHRU */
+    case QUAL_S | QUAL_V | QUAL_I | QUAL_RM_C:
+        gen_helper_cvttq_svic(cpu_fir[rc], vb);
+        break;
+    default:
+        gen_qual_roundmode(ctx, fn11);
+        gen_helper_cvttq(cpu_fir[rc], vb);
+        ignore |= (fn11 & QUAL_V ? 0 : float_flag_overflow);
+        ignore |= (fn11 & QUAL_I ? 0 : float_flag_inexact);
+        break;
+    }
+    tcg_temp_free(vb);
+
+    gen_fp_exc_raise_ignore(rc, fn11, ignore);
 }
 
+static void gen_ieee_intcvt(DisasContext *ctx, void (*helper)(TCGv, TCGv),
+			    int rb, int rc, int fn11)
+{
+    TCGv vb;
+
+    /* ??? This is wrong: the instruction is not a nop, it still may
+       raise exceptions.  */
+    if (unlikely(rc == 31)) {
+        return;
+    }
+
+    gen_qual_roundmode(ctx, fn11);
+
+    if (rb == 31) {
+        vb = tcg_const_i64(0);
+    } else {
+        vb = cpu_fir[rb];
+    }
+
+    /* The only exception that can be raised by integer conversion
+       is inexact.  Thus we only need to worry about exceptions when
+       inexact handling is requested.  */
+    if (fn11 & QUAL_I) {
+        gen_fp_exc_clear();
+        helper(cpu_fir[rc], vb);
+        gen_fp_exc_raise(rc, fn11);
+    } else {
+        helper(cpu_fir[rc], vb);
+    }
+
+    if (rb == 31) {
+        tcg_temp_free(vb);
+    }
+}
+
+#define IEEE_INTCVT(name)                                       \
+static inline void glue(gen_f, name)(DisasContext *ctx,         \
+                                     int rb, int rc, int fn11)  \
+{                                                               \
+    gen_ieee_intcvt(ctx, gen_helper_##name, rb, rc, fn11);      \
+}
+IEEE_INTCVT(cvtqs)
+IEEE_INTCVT(cvtqt)
+
+#define FARITH3(name)                                           \
+static inline void glue(gen_f, name)(int ra, int rb, int rc)    \
+{                                                               \
+    TCGv va, vb;                                                \
+                                                                \
+    if (unlikely(rc == 31)) {                                   \
+        return;                                                 \
+    }                                                           \
+    if (ra == 31) {                                             \
+        va = tcg_const_i64(0);                                  \
+    } else {                                                    \
+        va = cpu_fir[ra];                                       \
+    }                                                           \
+    if (rb == 31) {                                             \
+        vb = tcg_const_i64(0);                                  \
+    } else {                                                    \
+        vb = cpu_fir[rb];                                       \
+    }                                                           \
+                                                                \
+    gen_helper_ ## name (cpu_fir[rc], va, vb);                  \
+                                                                \
+    if (ra == 31) {                                             \
+        tcg_temp_free(va);                                      \
+    }                                                           \
+    if (rb == 31) {                                             \
+        tcg_temp_free(vb);                                      \
+    }                                                           \
+}
+/* ??? Ought to expand these inline; simple masking operations.  */
+FARITH3(cpys)
+FARITH3(cpysn)
+FARITH3(cpyse)
+
+/* ??? VAX instruction qualifiers ignored.  */
 FARITH3(addf)
 FARITH3(subf)
 FARITH3(mulf)
@@ -509,21 +786,80 @@ FARITH3(divg)
 FARITH3(cmpgeq)
 FARITH3(cmpglt)
 FARITH3(cmpgle)
-FARITH3(adds)
-FARITH3(subs)
-FARITH3(muls)
-FARITH3(divs)
-FARITH3(addt)
-FARITH3(subt)
-FARITH3(mult)
-FARITH3(divt)
-FARITH3(cmptun)
-FARITH3(cmpteq)
-FARITH3(cmptlt)
-FARITH3(cmptle)
-FARITH3(cpys)
-FARITH3(cpysn)
-FARITH3(cpyse)
+
+static void gen_ieee_arith3(DisasContext *ctx,
+                            void (*helper)(TCGv, TCGv, TCGv),
+                            int ra, int rb, int rc, int fn11)
+{
+    TCGv va, vb;
+
+    /* ??? This is wrong: the instruction is not a nop, it still may
+       raise exceptions.  */
+    if (unlikely(rc == 31)) {
+        return;
+    }
+
+    gen_qual_roundmode(ctx, fn11);
+    gen_qual_flushzero(ctx, fn11);
+    gen_fp_exc_clear();
+
+    va = gen_ieee_input(ra, fn11, 0);
+    vb = gen_ieee_input(rb, fn11, 0);
+    helper(cpu_fir[rc], va, vb);
+    tcg_temp_free(va);
+    tcg_temp_free(vb);
+
+    gen_fp_exc_raise(rc, fn11);
+}
+
+#define IEEE_ARITH3(name)                                               \
+static inline void glue(gen_f, name)(DisasContext *ctx,                 \
+                                     int ra, int rb, int rc, int fn11)  \
+{                                                                       \
+    gen_ieee_arith3(ctx, gen_helper_##name, ra, rb, rc, fn11);          \
+}
+IEEE_ARITH3(adds)
+IEEE_ARITH3(subs)
+IEEE_ARITH3(muls)
+IEEE_ARITH3(divs)
+IEEE_ARITH3(addt)
+IEEE_ARITH3(subt)
+IEEE_ARITH3(mult)
+IEEE_ARITH3(divt)
+
+static void gen_ieee_compare(DisasContext *ctx,
+                             void (*helper)(TCGv, TCGv, TCGv),
+                             int ra, int rb, int rc, int fn11)
+{
+    TCGv va, vb;
+
+    /* ??? This is wrong: the instruction is not a nop, it still may
+       raise exceptions.  */
+    if (unlikely(rc == 31)) {
+        return;
+    }
+
+    gen_fp_exc_clear();
+
+    va = gen_ieee_input(ra, fn11, 1);
+    vb = gen_ieee_input(rb, fn11, 1);
+    helper(cpu_fir[rc], va, vb);
+    tcg_temp_free(va);
+    tcg_temp_free(vb);
+
+    gen_fp_exc_raise(rc, fn11);
+}
+
+#define IEEE_CMP3(name)                                                 \
+static inline void glue(gen_f, name)(DisasContext *ctx,                 \
+                                     int ra, int rb, int rc, int fn11)  \
+{                                                                       \
+    gen_ieee_compare(ctx, gen_helper_##name, ra, rb, rc, fn11);         \
+}
+IEEE_CMP3(cmptun)
+IEEE_CMP3(cmpteq)
+IEEE_CMP3(cmptlt)
+IEEE_CMP3(cmptle)
 
 static inline uint64_t zapnot_mask(uint8_t lit)
 {
@@ -1607,7 +1943,7 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn)
         }
         break;
     case 0x14:
-        switch (fpfn) { /* f11 & 0x3F */
+        switch (fpfn) { /* fn11 & 0x3F */
         case 0x04:
             /* ITOFS */
             if (!(ctx->amask & AMASK_FIX))
@@ -1632,7 +1968,7 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn)
             /* SQRTS */
             if (!(ctx->amask & AMASK_FIX))
                 goto invalid_opc;
-            gen_fsqrts(rb, rc);
+            gen_fsqrts(ctx, rb, rc, fn11);
             break;
         case 0x14:
             /* ITOFF */
@@ -1669,7 +2005,7 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn)
             /* SQRTT */
             if (!(ctx->amask & AMASK_FIX))
                 goto invalid_opc;
-            gen_fsqrtt(rb, rc);
+            gen_fsqrtt(ctx, rb, rc, fn11);
             break;
         default:
             goto invalid_opc;
@@ -1678,7 +2014,7 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn)
     case 0x15:
         /* VAX floating point */
         /* XXX: rounding mode and trap are ignored (!) */
-        switch (fpfn) { /* f11 & 0x3F */
+        switch (fpfn) { /* fn11 & 0x3F */
         case 0x00:
             /* ADDF */
             gen_faddf(ra, rb, rc);
@@ -1761,77 +2097,75 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn)
         break;
     case 0x16:
         /* IEEE floating-point */
-        /* XXX: rounding mode and traps are ignored (!) */
-        switch (fpfn) { /* f11 & 0x3F */
+        switch (fpfn) { /* fn11 & 0x3F */
         case 0x00:
             /* ADDS */
-            gen_fadds(ra, rb, rc);
+            gen_fadds(ctx, ra, rb, rc, fn11);
             break;
         case 0x01:
             /* SUBS */
-            gen_fsubs(ra, rb, rc);
+            gen_fsubs(ctx, ra, rb, rc, fn11);
             break;
         case 0x02:
             /* MULS */
-            gen_fmuls(ra, rb, rc);
+            gen_fmuls(ctx, ra, rb, rc, fn11);
             break;
         case 0x03:
             /* DIVS */
-            gen_fdivs(ra, rb, rc);
+            gen_fdivs(ctx, ra, rb, rc, fn11);
             break;
         case 0x20:
             /* ADDT */
-            gen_faddt(ra, rb, rc);
+            gen_faddt(ctx, ra, rb, rc, fn11);
             break;
         case 0x21:
             /* SUBT */
-            gen_fsubt(ra, rb, rc);
+            gen_fsubt(ctx, ra, rb, rc, fn11);
             break;
         case 0x22:
             /* MULT */
-            gen_fmult(ra, rb, rc);
+            gen_fmult(ctx, ra, rb, rc, fn11);
             break;
         case 0x23:
             /* DIVT */
-            gen_fdivt(ra, rb, rc);
+            gen_fdivt(ctx, ra, rb, rc, fn11);
             break;
         case 0x24:
             /* CMPTUN */
-            gen_fcmptun(ra, rb, rc);
+            gen_fcmptun(ctx, ra, rb, rc, fn11);
             break;
         case 0x25:
             /* CMPTEQ */
-            gen_fcmpteq(ra, rb, rc);
+            gen_fcmpteq(ctx, ra, rb, rc, fn11);
             break;
         case 0x26:
             /* CMPTLT */
-            gen_fcmptlt(ra, rb, rc);
+            gen_fcmptlt(ctx, ra, rb, rc, fn11);
             break;
         case 0x27:
             /* CMPTLE */
-            gen_fcmptle(ra, rb, rc);
+            gen_fcmptle(ctx, ra, rb, rc, fn11);
             break;
         case 0x2C:
-            /* XXX: incorrect */
             if (fn11 == 0x2AC || fn11 == 0x6AC) {
                 /* CVTST */
-                gen_fcvtst(rb, rc);
+                gen_fcvtst(ctx, rb, rc, fn11);
             } else {
                 /* CVTTS */
-                gen_fcvtts(rb, rc);
+                gen_fcvtts(ctx, rb, rc, fn11);
             }
             break;
         case 0x2F:
             /* CVTTQ */
-            gen_fcvttq(rb, rc);
+            gen_fcvttq(ctx, rb, rc, fn11);
             break;
         case 0x3C:
             /* CVTQS */
-            gen_fcvtqs(rb, rc);
+            gen_fcvtqs(ctx, rb, rc, fn11);
             break;
         case 0x3E:
             /* CVTQT */
-            gen_fcvtqt(rb, rc);
+            gen_fcvtqt(ctx, rb, rc, fn11);
             break;
         default:
             goto invalid_opc;
@@ -1910,11 +2244,11 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn)
             break;
         case 0x130:
             /* CVTQL/V */
-            gen_fcvtqlv(rb, rc);
+            gen_fcvtql_v(rb, rc);
             break;
         case 0x530:
             /* CVTQL/SV */
-            gen_fcvtqlsv(rb, rc);
+            gen_fcvtql_sv(rb, rc);
             break;
         default:
             goto invalid_opc;
@@ -2597,6 +2931,17 @@ static inline void gen_intermediate_code_internal(CPUState *env,
     ctx.mem_idx = ((env->ps >> 3) & 3);
     ctx.pal_mode = env->ipr[IPR_EXC_ADDR] & 1;
 #endif
+
+    /* ??? Every TB begins with unset rounding mode, to be initialized on
+       the first fp insn of the TB.  Alternately we could define a proper
+       default for every TB (e.g. QUAL_RM_N or QUAL_RM_D) and make sure
+       to reset the FP_STATUS to that default at the end of any TB that
+       changes the default.  We could even (gasp) dynamiclly figure out
+       what default would be most efficient given the running program.  */
+    ctx.tb_rm = -1;
+    /* Similarly for flush-to-zero.  */
+    ctx.tb_ftz = -1;
+
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
     if (max_insns == 0)