summaryrefslogtreecommitdiffstats
path: root/results/classifier/118/none/1620
diff options
context:
space:
mode:
authorChristian Krinitsin <mail@krinitsin.com>2025-06-16 16:59:00 +0000
committerChristian Krinitsin <mail@krinitsin.com>2025-06-16 16:59:33 +0000
commit9aba81d8eb048db908c94a3c40c25a5fde0caee6 (patch)
treeb765e7fb5e9a3c2143c68b0414e0055adb70e785 /results/classifier/118/none/1620
parentb89a938452613061c0f1f23e710281cf5c83cb29 (diff)
downloademulator-bug-study-9aba81d8eb048db908c94a3c40c25a5fde0caee6.tar.gz
emulator-bug-study-9aba81d8eb048db908c94a3c40c25a5fde0caee6.zip
add 18th iteration of classifier
Diffstat (limited to 'results/classifier/118/none/1620')
-rw-r--r--results/classifier/118/none/1620124
1 files changed, 124 insertions, 0 deletions
diff --git a/results/classifier/118/none/1620 b/results/classifier/118/none/1620
new file mode 100644
index 00000000..02d29cc0
--- /dev/null
+++ b/results/classifier/118/none/1620
@@ -0,0 +1,124 @@
+assembly: 0.775
+permissions: 0.759
+user-level: 0.730
+architecture: 0.729
+device: 0.720
+debug: 0.718
+KVM: 0.715
+performance: 0.712
+mistranslation: 0.702
+peripherals: 0.701
+hypervisor: 0.699
+files: 0.696
+register: 0.695
+arm: 0.689
+semantic: 0.688
+risc-v: 0.685
+VMM: 0.681
+virtual: 0.678
+PID: 0.674
+graphic: 0.672
+ppc: 0.661
+TCG: 0.654
+kernel: 0.653
+vnc: 0.639
+socket: 0.616
+boot: 0.561
+network: 0.554
+x86: 0.531
+i386: 0.505
+
+SME FMOPA outer product instruction gives incorrect result
+Description of problem:
+The SME outer product instructions operate on tiles of elements. In the
+below example we are performing an outer product of a vector of 1.0
+by itself. This naturally should produce a matrix filled with 1.0
+values, however if we read the values of the tile and printf them we
+instead observe 0.0 values.
+
+Without digging into the underlying QEMU code this appears to be a bug
+in how elements are set based on the tile number, since the same code
+using za0.s rather than za1.s correctly reports all 1.0 values as output
+as expected.
+
+main.c
+```
+#include <stdio.h>
+
+void foo(float *dst);
+
+int main() {
+ float dst[16];
+ foo(dst);
+
+ // This should print:
+ // >>> 1.000000 1.000000 1.000000 1.000000
+ // >>> 1.000000 1.000000 1.000000 1.000000
+ // >>> 1.000000 1.000000 1.000000 1.000000
+ // >>> 1.000000 1.000000 1.000000 1.000000
+ for (int i=0; i<4; ++i) {
+ printf(">>> ");
+ for (int j=0; j<4; ++j) {
+ printf("%lf ", (double)dst[i * 4 + j]);
+ }
+ printf("\n");
+ }
+}
+```
+
+foo.S
+```
+.global foo
+foo:
+ stp x29, x30, [sp, -80]!
+ mov x29, sp
+ stp d8, d9, [sp, 16]
+ stp d10, d11, [sp, 32]
+ stp d12, d13, [sp, 48]
+ stp d14, d15, [sp, 64]
+
+ smstart
+
+ ptrue p0.s, vl4
+ fmov z0.s, #1.0
+
+ // An outer product of a vector of 1.0 by itself should be a matrix of 1.0.
+ // Note that we are using tile 1 here (za1.s) rather than tile 0.
+ zero {za}
+ fmopa za1.s, p0/m, p0/m, z0.s, z0.s
+
+ // Read the first 4x4 sub-matrix of elements from tile 1:
+ // Note that za1h should be interchangable here.
+ mov w12, #0
+ mova z0.s, p0/m, za1v.s[w12, #0]
+ mova z1.s, p0/m, za1v.s[w12, #1]
+ mova z2.s, p0/m, za1v.s[w12, #2]
+ mova z3.s, p0/m, za1v.s[w12, #3]
+
+ // And store them to the input pointer (dst in the C code):
+ st1w {z0.s}, p0, [x0]
+ add x0, x0, #16
+ st1w {z1.s}, p0, [x0]
+ add x0, x0, #16
+ st1w {z2.s}, p0, [x0]
+ add x0, x0, #16
+ st1w {z3.s}, p0, [x0]
+
+ smstop
+
+ ldp d8, d9, [sp, 16]
+ ldp d10, d11, [sp, 32]
+ ldp d12, d13, [sp, 48]
+ ldp d14, d15, [sp, 64]
+ ldp x29, x30, [sp], 80
+ ret
+```
+Steps to reproduce:
+```
+$ clang -target aarch64-linux-gnu -march=armv9-a+sme test.c -O1 -static
+$ ~/qemu/build/qemu-aarch64 ./a.out
+>>> 0.000000 0.000000 0.000000 0.000000
+>>> 0.000000 0.000000 0.000000 0.000000
+>>> 0.000000 0.000000 0.000000 0.000000
+>>> 0.000000 0.000000 0.000000 0.000000
+```