diff options
| author | Richard Henderson <richard.henderson@linaro.org> | 2018-05-10 18:10:58 +0100 |
|---|---|---|
| committer | Peter Maydell <peter.maydell@linaro.org> | 2018-05-10 18:10:58 +0100 |
| commit | 88808a022c06f98d81cd3f2d105a5734c5614839 (patch) | |
| tree | c111f273409068f6aa239cd3fc8cdd136f71bf53 /target/arm/helper.c | |
| parent | d0ba8e74acd299b092786ffc30b306638d395a9e (diff) | |
| download | focaccia-qemu-88808a022c06f98d81cd3f2d105a5734c5614839.tar.gz focaccia-qemu-88808a022c06f98d81cd3f2d105a5734c5614839.zip | |
target/arm: Fix float16 to/from int16
The instruction "ucvtf v0.4h, v04h, #2", with input 0x8000u, overflows the intermediate float16 to infinity before we have a chance to scale the output. Use float64 as the intermediate type so that no input argument (uint32_t in this case) can overflow or round before scaling. Given the declared argument, the signed int32_t function has the same problem. When converting from float16 to integer, using u/int32_t instead of u/int16_t means that the bounding is incorrect. Cc: qemu-stable@nongnu.org Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20180502221552.3873-4-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'target/arm/helper.c')
| -rw-r--r-- | target/arm/helper.c | 53 |
1 files changed, 51 insertions, 2 deletions
diff --git a/target/arm/helper.c b/target/arm/helper.c index 0fef5d4d06..817f9d81a0 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -11420,11 +11420,60 @@ VFP_CONV_FIX_A64(sq, s, 32, 64, int64) VFP_CONV_FIX(uh, s, 32, 32, uint16) VFP_CONV_FIX(ul, s, 32, 32, uint32) VFP_CONV_FIX_A64(uq, s, 32, 64, uint64) -VFP_CONV_FIX_A64(sl, h, 16, 32, int32) -VFP_CONV_FIX_A64(ul, h, 16, 32, uint32) + #undef VFP_CONV_FIX #undef VFP_CONV_FIX_FLOAT #undef VFP_CONV_FLOAT_FIX_ROUND +#undef VFP_CONV_FIX_A64 + +/* Conversion to/from f16 can overflow to infinity before/after scaling. + * Therefore we convert to f64 (which does not round), scale, + * and then convert f64 to f16 (which may round). + */ + +static float16 do_postscale_fp16(float64 f, int shift, float_status *fpst) +{ + return float64_to_float16(float64_scalbn(f, -shift, fpst), true, fpst); +} + +float16 HELPER(vfp_sltoh)(uint32_t x, uint32_t shift, void *fpst) +{ + return do_postscale_fp16(int32_to_float64(x, fpst), shift, fpst); +} + +float16 HELPER(vfp_ultoh)(uint32_t x, uint32_t shift, void *fpst) +{ + return do_postscale_fp16(uint32_to_float64(x, fpst), shift, fpst); +} + +static float64 do_prescale_fp16(float16 f, int shift, float_status *fpst) +{ + if (unlikely(float16_is_any_nan(f))) { + float_raise(float_flag_invalid, fpst); + return 0; + } else { + int old_exc_flags = get_float_exception_flags(fpst); + float64 ret; + + ret = float16_to_float64(f, true, fpst); + ret = float64_scalbn(ret, shift, fpst); + old_exc_flags |= get_float_exception_flags(fpst) + & float_flag_input_denormal; + set_float_exception_flags(old_exc_flags, fpst); + + return ret; + } +} + +uint32_t HELPER(vfp_toshh)(float16 x, uint32_t shift, void *fpst) +{ + return float64_to_int16(do_prescale_fp16(x, shift, fpst), fpst); +} + +uint32_t HELPER(vfp_touhh)(float16 x, uint32_t shift, void *fpst) +{ + return float64_to_uint16(do_prescale_fp16(x, shift, fpst), fpst); +} /* Set the current fp rounding mode and return the old one. * The argument is a softfloat float_round_ value. |