diff options
Diffstat (limited to 'results/classifier/118/all/1841592')
| -rw-r--r-- | results/classifier/118/all/1841592 | 609 |
1 files changed, 609 insertions, 0 deletions
diff --git a/results/classifier/118/all/1841592 b/results/classifier/118/all/1841592 new file mode 100644 index 000000000..d660c7549 --- /dev/null +++ b/results/classifier/118/all/1841592 @@ -0,0 +1,609 @@ +user-level: 0.945 +permissions: 0.935 +peripherals: 0.918 +debug: 0.912 +register: 0.909 +semantic: 0.906 +ppc: 0.904 +vnc: 0.904 +TCG: 0.901 +KVM: 0.897 +socket: 0.886 +PID: 0.884 +mistranslation: 0.882 +architecture: 0.879 +hypervisor: 0.875 +VMM: 0.862 +graphic: 0.860 +kernel: 0.857 +device: 0.857 +boot: 0.856 +assembly: 0.853 +performance: 0.848 +arm: 0.843 +risc-v: 0.837 +network: 0.818 +x86: 0.807 +files: 0.790 +virtual: 0.784 +i386: 0.732 + +ppc: softfloat float implementation issues + +Per bug #1841491, Richard Henderson (rth) said: +> The float test failure is part of a larger problem for target/powerpc in which all float +> routines are implemented incorrectly. They are all implemented as double operations with +> rounding to float as a second step. Which not only produces incorrect exceptions, as in +> this case, but incorrect > numerical results from the double rounding. +> +> This should probably be split to a separate bug... + + + +-- ppc64le native: +$ gcc -c -O2 ffma.c +$ gcc -O2 test-ffma.c ffma.o -lm -o test-ffma +$ ./test-ffma $(./test-ffma) +ffma(0x1p-149, 0x1p-149, 0x1p-149) +0x0 + +0xa000000 +FE_INEXACT FE_UNDERFLOW +0x1p-149 + +-- qemu-system-ppc64: +$ ./test-ffma $(./test-ffma) +ffma(0x1p-149, 0x1p-149, 0x1p-149) +0x0 + +0x2000000 +FE_INEXACT +0x1p-149 + + +I'm confused by this testcase as it's not a fused multiply-add but as you say two combined operations. + +It should be a fused multiply add; you may need to use -ffast-math or +something to get the compiler to generate the proper instruction. + +However, one can see from target/ppc/translate/fp-impl.inc.c: + +/* fmadd - fmadds */ +GEN_FLOAT_ACB(madd, 0x1D, 1, PPC_FLOAT); + +through to _GEN_FLOAT_ACB: + + gen_helper_f##op(t3, cpu_env, t0, t1, t2); \ + if (isfloat) { \ + gen_helper_frsp(t3, cpu_env, t3); \ + } \ + +That right there is a double-precision fma followed by a round +to single precision. This pattern is replicated for all single +precision operations, and is of course wrong. + +I believe that correct results may be obtained by having +single-precision helpers that first convert the double-precision +input into a single-precision input using helper_tosingle(), +perform the required operation, then convert the result back to +double-precision using helper_todouble(). + +The manual says: + +# For single-precision arithmetic instructions, all input values +# must be representable in single format; if they are not, the +# result placed into the target FPR, and the setting of +# status bits in the FPSCR and in the Condition Register +# (if Rc=1), are undefined. + +The tosingle/todouble conversions are exact and bit-preserving. +They are used by load-single and store-single that convert a +single-precision in-memory value to the double-precision register +value. Therefore the input given to float32_add using this +conversion would be exactly the same as if we had given the +value unmollested from a memory input. + +I don't know what real ppc hw does -- whether it takes all of +the double-precision input bits and rounds to 23-bits, like the +old 80387 hardware does, or truncates the input as I propose. +But for architectural results we don't have to care, because +of the UNDEFINED escape clause. + +Testing on current master shows the behavior is correct. I guess rth's patch fixed this case. + +It looks like the test case isn't properly exercising the code that is likely to be wrong. It sounds like we need a proper comprehensive testcase for fused operations (along the line of the ARM fcvt test case). This can probably be a multiarch testcase which we can build for all the various targets. + +This is a generic floating point multiply and accumulate test for +single precision floating point values. I've split of the common float +functions into a helper library so additional tests can use the same +common code. + +Signed-off-by: Alex Bennée <email address hidden> +--- + tests/tcg/multiarch/Makefile.target | 7 +- + tests/tcg/multiarch/float_helpers.c | 208 ++++++++++++++++++++++++++++ + tests/tcg/multiarch/float_helpers.h | 26 ++++ + tests/tcg/multiarch/float_madds.c | 78 +++++++++++ + 4 files changed, 318 insertions(+), 1 deletion(-) + create mode 100644 tests/tcg/multiarch/float_helpers.c + create mode 100644 tests/tcg/multiarch/float_helpers.h + create mode 100644 tests/tcg/multiarch/float_madds.c + +diff --git a/tests/tcg/multiarch/Makefile.target b/tests/tcg/multiarch/Makefile.target +index 657a04f802d..0446b75c456 100644 +--- a/tests/tcg/multiarch/Makefile.target ++++ b/tests/tcg/multiarch/Makefile.target +@@ -10,12 +10,17 @@ MULTIARCH_SRC=$(SRC_PATH)/tests/tcg/multiarch + # Set search path for all sources + VPATH += $(MULTIARCH_SRC) + MULTIARCH_SRCS =$(notdir $(wildcard $(MULTIARCH_SRC)/*.c)) +-MULTIARCH_TESTS =$(MULTIARCH_SRCS:.c=) ++MULTIARCH_TESTS =$(filter-out float_helpers, $(MULTIARCH_SRCS:.c=)) + + # + # The following are any additional rules needed to build things + # + ++ ++float_madds: LDFLAGS+=-lm ++float_madds: float_madds.c float_helpers.c ++ $(CC) $(CFLAGS) $(EXTRA_CFLAGS) -O2 $< $(MULTIARCH_SRC)/float_helpers.c -o $@ $(LDFLAGS) ++ + testthread: LDFLAGS+=-lpthread + + # We define the runner for test-mmap after the individual +diff --git a/tests/tcg/multiarch/float_helpers.c b/tests/tcg/multiarch/float_helpers.c +new file mode 100644 +index 00000000000..481d8d33317 +--- /dev/null ++++ b/tests/tcg/multiarch/float_helpers.c +@@ -0,0 +1,208 @@ ++/* ++ * Common Float Helpers ++ * ++ * This contains a series of useful utility routines and a set of ++ * floating point constants useful for exercising the edge cases in ++ * floating point tests. ++ * ++ * Copyright (c) 2019 Linaro ++ * ++ * SPDX-License-Identifier: GPL-3.0-or-later ++ */ ++ ++/* we want additional float type definitions */ ++#define __STDC_WANT_IEC_60559_BFP_EXT__ ++#define __STDC_WANT_IEC_60559_TYPES_EXT__ ++ ++#define _GNU_SOURCE ++#include <stdio.h> ++#include <inttypes.h> ++#include <math.h> ++#include <float.h> ++#include <fenv.h> ++ ++#include "float_helpers.h" ++ ++#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) ++ ++/* ++ * Half Precision Numbers ++ * ++ * Not yet well standardised so we return a plain uint16_t for now. ++ */ ++ ++/* no handy defines for these numbers */ ++static uint16_t f16_numbers[] = { ++ 0xffff, /* -NaN / AHP -Max */ ++ 0xfcff, /* -NaN / AHP */ ++ 0xfc01, /* -NaN / AHP */ ++ 0xfc00, /* -Inf */ ++ 0xfbff, /* -Max */ ++ 0xc000, /* -2 */ ++ 0xbc00, /* -1 */ ++ 0x8001, /* -MIN subnormal */ ++ 0x8000, /* -0 */ ++ 0x0000, /* +0 */ ++ 0x0001, /* MIN subnormal */ ++ 0x3c00, /* 1 */ ++ 0x7bff, /* Max */ ++ 0x7c00, /* Inf */ ++ 0x7c01, /* NaN / AHP */ ++ 0x7cff, /* NaN / AHP */ ++ 0x7fff, /* NaN / AHP +Max*/ ++}; ++ ++const int num_f16 = ARRAY_SIZE(f16_numbers); ++ ++uint16_t get_f16(int i) { ++ return f16_numbers[i % num_f16]; ++} ++ ++/* only display as hex */ ++char *fmt_16(uint16_t num) { ++ char *fmt; ++ asprintf(&fmt, "f16 %#04x", num); ++ return fmt; ++} ++ ++/* ++ * Single Precision Numbers ++ */ ++ ++#ifndef SNANF ++/* Signaling NaN macros, if supported. */ ++# if __GNUC_PREREQ(3, 3) ++# define SNANF (__builtin_nansf ("")) ++# define SNAN (__builtin_nans ("")) ++# define SNANL (__builtin_nansl ("")) ++# endif ++#endif ++ ++static float f32_numbers[] = { ++ -SNANF, ++ -NAN, ++ -INFINITY, ++ -FLT_MAX, ++ -1.111E+31, ++ -1.111E+30, ++ -1.08700982e-12, ++ -1.78051176e-20, ++ -FLT_MIN, ++ 0.0, ++ FLT_MIN, ++ 2.98023224e-08, ++ 5.96046E-8, /* min positive FP16 subnormal */ ++ 6.09756E-5, /* max subnormal FP16 */ ++ 6.10352E-5, /* min positive normal FP16 */ ++ 1.0, ++ 1.0009765625, /* smallest float after 1.0 FP16 */ ++ 2.0, ++ M_E, M_PI, ++ 65503.0, ++ 65504.0, /* max FP16 */ ++ 65505.0, ++ 131007.0, ++ 131008.0, /* max AFP */ ++ 131009.0, ++ 1.111E+30, ++ FLT_MAX, ++ INFINITY, ++ NAN, ++ SNANF ++}; ++ ++const int num_f32 = ARRAY_SIZE(f32_numbers); ++ ++float get_f32(int i) { ++ return f32_numbers[i % num_f32]; ++} ++ ++char *fmt_f32(float num) { ++ uint32_t single_as_hex = *(uint32_t *) # ++ char *fmt; ++ asprintf(&fmt, "f32 %02.20e / %#010x", num, single_as_hex); ++ return fmt; ++} ++ ++ ++/* This allows us to initialise some doubles as pure hex */ ++typedef union { ++ double d; ++ uint64_t h; ++} test_doubles; ++ ++static test_doubles f64_numbers[] = { ++ {SNAN}, ++ {-NAN}, ++ {-INFINITY}, ++ {-DBL_MAX}, ++ {-FLT_MAX-1.0}, ++ {-FLT_MAX}, ++ {-1.111E+31}, ++ {-1.111E+30}, /* half prec */ ++ {-2.0}, {-1.0}, ++ {-DBL_MIN}, ++ {-FLT_MIN}, ++ {0.0}, ++ {FLT_MIN}, ++ {2.98023224e-08}, ++ {5.96046E-8}, /* min positive FP16 subnormal */ ++ {6.09756E-5}, /* max subnormal FP16 */ ++ {6.10352E-5}, /* min positive normal FP16 */ ++ {1.0}, ++ {1.0009765625}, /* smallest float after 1.0 FP16 */ ++ {DBL_MIN}, ++ {1.3789972848607228e-308}, ++ {1.4914738736681624e-308}, ++ {1.0}, {2.0}, ++ {M_E}, {M_PI}, ++ {65503.0}, ++ {65504.0}, /* max FP16 */ ++ {65505.0}, ++ {131007.0}, ++ {131008.0}, /* max AFP */ ++ {131009.0}, ++ {.h = 0x41dfffffffc00000 }, /* to int = 0x7fffffff */ ++ {FLT_MAX}, ++ {FLT_MAX + 1.0}, ++ {DBL_MAX}, ++ {INFINITY}, ++ {NAN}, ++ {.h = 0x7ff0000000000001}, /* SNAN */ ++ {SNAN}, ++}; ++ ++const int num_f64 = ARRAY_SIZE(f64_numbers); ++ ++double get_f64(int i) { ++ return f64_numbers[i % num_f64].d; ++} ++ ++char *fmt_f64(double num) { ++ uint64_t double_as_hex = *(uint64_t *) # ++ char *fmt; ++ asprintf(&fmt, "f64 %02.20e / %#020" PRIx64, num, double_as_hex); ++ return fmt; ++} ++ ++/* ++ * Float flags ++ */ ++char *fmt_flags(void) ++{ ++ int flags = fetestexcept(FE_ALL_EXCEPT); ++ char *fmt; ++ ++ if (flags) { ++ asprintf(&fmt, "%s%s%s%s%s", ++ flags & FE_OVERFLOW ? "OVERFLOW " : "", ++ flags & FE_UNDERFLOW ? "UNDERFLOW " : "", ++ flags & FE_DIVBYZERO ? "DIV0 " : "", ++ flags & FE_INEXACT ? "INEXACT " : "", ++ flags & FE_INVALID ? "INVALID" : ""); ++ } else { ++ asprintf(&fmt, "OK"); ++ } ++ ++ return fmt; ++} +diff --git a/tests/tcg/multiarch/float_helpers.h b/tests/tcg/multiarch/float_helpers.h +new file mode 100644 +index 00000000000..4a1e2f3853a +--- /dev/null ++++ b/tests/tcg/multiarch/float_helpers.h +@@ -0,0 +1,26 @@ ++/* ++ * Common Float Helpers ++ * ++ * Copyright (c) 2019 Linaro ++ * ++ * SPDX-License-Identifier: GPL-3.0-or-later ++ */ ++ ++#include <inttypes.h> ++ ++/* Number of constants in each table */ ++extern const int num_f16; ++extern const int num_f32; ++extern const int num_f64; ++ ++/* Accessor helpers */ ++uint16_t get_f16(int i); /* use _Float16 when we can */ ++float get_f32(int i); ++double get_f64(int i); ++ ++/* Return format strings, free after use */ ++char * fmt_f16(uint16_t); ++char * fmt_f32(float); ++char * fmt_f64(double); ++/* exception flags */ ++char * fmt_flags(void); +diff --git a/tests/tcg/multiarch/float_madds.c b/tests/tcg/multiarch/float_madds.c +new file mode 100644 +index 00000000000..bc11eea9084 +--- /dev/null ++++ b/tests/tcg/multiarch/float_madds.c +@@ -0,0 +1,78 @@ ++/* ++ * Fused Multiply Add (Single) ++ * ++ * Copyright (c) 2019 Linaro ++ * ++ * SPDX-License-Identifier: GPL-3.0-or-later ++ */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++#include <math.h> ++#include <float.h> ++#include <fenv.h> ++ ++#include "float_helpers.h" ++ ++#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) ++ ++typedef struct { ++ int flag; ++ char *desc; ++} float_mapping; ++ ++float_mapping round_flags[] = { ++ { FE_TONEAREST, "to nearest" }, ++ { FE_UPWARD, "upwards" }, ++ { FE_DOWNWARD, "downwards" }, ++ { FE_TOWARDZERO, "to zero" } ++}; ++ ++void print_result(float a, float b, float c, float r) ++{ ++ char *a_fmt, *b_fmt, *c_fmt, *r_fmt, *flag_fmt; ++ ++ a_fmt = fmt_f32(a); ++ b_fmt = fmt_f32(b); ++ c_fmt = fmt_f32(c); ++ r_fmt = fmt_f32(r); ++ flag_fmt = fmt_flags(); ++ ++ printf("%s * %s + %s = %s (%s)\n", ++ a_fmt, b_fmt, c_fmt, r_fmt, flag_fmt); ++ ++ free(a_fmt); ++ free(b_fmt); ++ free(c_fmt); ++ free(r_fmt); ++ free(flag_fmt); ++} ++ ++ ++int main(int argc, char *argv[argc]) ++{ ++ int i, j, k, l; ++ float a, b, c, r; ++ ++ for (i = 0; i < ARRAY_SIZE(round_flags); ++i) { ++ fesetround(round_flags[i].flag); ++ printf("### Rounding %s\n", round_flags[i].desc); ++ for (j = 0; j < num_f32; j += 3) { ++ for (k = 1; k < num_f32; k += 3 ) { ++ for (l = 2; l < num_f32; l += 3) { ++ a = get_f32(j); ++ b = get_f32(k); ++ c = get_f32(l); ++ feclearexcept(FE_ALL_EXCEPT); ++ ++ /* must be built with -O2 to generate fused op */ ++ r = a * b + c; ++ ++ print_result(a, b, c, r); ++ } ++ } ++ } ++ } ++ ++ return 0; ++} +-- +2.20.1 + + + +Dave ran the above testcase on: + +> processor : 0 +> cpu : POWER8E (raw), altivec supported +> clock : 3325.000000MHz +> revision : 2.1 (pvr 004b 0201) + +And there are no diffs with what you currently get from master. So I think the invalid flag fault is fixed by a previous commit and the potential precision faults don't get picked up by the test case. I guess we could be a bit smarted about testing the limits. + +On 9/13/19 8:49 AM, Alex Bennée wrote: +> +static float f32_numbers[] = { +> + -SNANF, +> + -NAN, +> + -INFINITY, +> + -FLT_MAX, +> + -1.111E+31, +> + -1.111E+30, +> + -1.08700982e-12, +> + -1.78051176e-20, +> + -FLT_MIN, +> + 0.0, +> + FLT_MIN, +> + 2.98023224e-08, +> + 5.96046E-8, /* min positive FP16 subnormal */ +> + 6.09756E-5, /* max subnormal FP16 */ +> + 6.10352E-5, /* min positive normal FP16 */ +> + 1.0, +> + 1.0009765625, /* smallest float after 1.0 FP16 */ +> + 2.0, +> + M_E, M_PI, +> + 65503.0, +> + 65504.0, /* max FP16 */ +> + 65505.0, +> + 131007.0, +> + 131008.0, /* max AFP */ +> + 131009.0, +> + 1.111E+30, +> + FLT_MAX, +> + INFINITY, +> + NAN, +> + SNANF +> +}; + +I've noticed that Glibc prefers to use hex representation for float values, to ensure an accurate representation. If you care to do so, here they are: +static float f32_numbers[] = { + -SNANF, + -NAN, + -INFINITY, + -FLT_MAX, + -0x1.1874b2p+103, + -0x1.c0bab6p+99, + -0x1.31f75p-40, + -0x1.505444p-66, + -FLT_MIN, + 0.0, + FLT_MIN, + 0x1p-25, + 0x1.ffffe6p-25, /* min positive FP16 subnormal */ + 0x1.ff801ap-15, /* max subnormal FP16 */ + 0x1.00000cp-14, /* min positive normal FP16 */ + 1.0, + 0x1.004p+0, /* smallest float after 1.0 FP16 */ + 2.0, + M_E, M_PI, + 0x1.ffbep+15, + 0x1.ffcp+15, /* max FP16 */ + 0x1.ffc2p+15, + 0x1.ffbfp+16, + 0x1.ffcp+16, /* max AFP */ + 0x1.ffc1p+16, + 0x1.c0bab6p+99, + FLT_MAX, + INFINITY, + NAN, + SNANF +}; + +PC + + + +Richard Henderson <email address hidden> writes: + +> On 9/13/19 9:49 AM, Alex Bennée wrote: +>> + /* must be built with -O2 to generate fused op */ +>> + r = a * b + c; +> +> I would prefer to use fmaf() or __builtin_fmaf() here. +> +> Although you'll need to link with -lm just in case the +> target doesn't support an instruction for fmaf and so +> the builtin expands to the standard library call. + +I can do that - we have other tests that link to libm. + +I was expecting to see more breakage but the ppc64 tests all passed (or +at least against the power8 David ran it on). What am I missing to hit +the cases you know are broken? + +I've also experimented with reducing the number of iterations because if +we want to have golden references we probably don't want to dump several +hundred kilobytes of "golden" references into the source tree. + +> I also like Paul's suggestion to use hex float constants. + +Hmm I guess - look a bit weird but I guess that's lack of familiarity. +Is is still normalised? I guess the frac shows up (without the implicit +bit). + +> +> +> r~ + + +-- +Alex Bennée + + +[Expired for QEMU because there has been no activity for 60 days.] + |