summaryrefslogtreecommitdiffstats
path: root/results/classifier/zero-shot/108/debug/2595
diff options
context:
space:
mode:
authorChristian Krinitsin <mail@krinitsin.com>2025-07-03 19:39:53 +0200
committerChristian Krinitsin <mail@krinitsin.com>2025-07-03 19:39:53 +0200
commitdee4dcba78baf712cab403d47d9db319ab7f95d6 (patch)
tree418478faf06786701a56268672f73d6b0b4eb239 /results/classifier/zero-shot/108/debug/2595
parent4d9e26c0333abd39bdbd039dcdb30ed429c475ba (diff)
downloademulator-bug-study-dee4dcba78baf712cab403d47d9db319ab7f95d6.tar.gz
emulator-bug-study-dee4dcba78baf712cab403d47d9db319ab7f95d6.zip
restructure results
Diffstat (limited to 'results/classifier/zero-shot/108/debug/2595')
-rw-r--r--results/classifier/zero-shot/108/debug/2595150
1 files changed, 150 insertions, 0 deletions
diff --git a/results/classifier/zero-shot/108/debug/2595 b/results/classifier/zero-shot/108/debug/2595
new file mode 100644
index 00000000..2fd9a8a6
--- /dev/null
+++ b/results/classifier/zero-shot/108/debug/2595
@@ -0,0 +1,150 @@
+debug: 0.959
+graphic: 0.959
+other: 0.957
+semantic: 0.951
+performance: 0.951
+device: 0.919
+PID: 0.908
+permissions: 0.906
+socket: 0.901
+boot: 0.896
+vnc: 0.886
+files: 0.871
+network: 0.861
+KVM: 0.850
+
+Incorrect behavior with 64-bit element SDOT and UDOT instructions on ARM SVE when sve-default-vector-length>=64
+Description of problem:
+The behavior of SDOT and UDOT instructions are incorrect when the Zresult.D register is used, which is the 64-bit svdot_lane\_{s,u}64 intrinsic in ACLE.
+
+I have tested the same code using [Arm Instruction Emulator](https://developer.arm.com/Tools%20and%20Software/Arm%20Instruction%20Emulator) (which is deprecated though) and gem5 which produced correct result, I believe that the SDOT and UDOT implementation in qemu is incorrect.
+Steps to reproduce:
+1. Get Arm Gnu toolchain from [Arm GNU Toolchain Downloads – Arm Developer](https://developer.arm.com/downloads/-/arm-gnu-toolchain-downloads), for x86 Linux hosts, download arm-gnu-toolchain-13.3.rel1-x86_64-aarch64-none-linux-gnu.tar.xz and extract it. Alternatively, use any compiler that is able to cross compile for armv8.2-a+sve targets.
+2. Compile the following program with these compiler arguments
+
+ ```
+ arm-gnu-toolchain-13.3.rel1-x86_64-aarch64-none-linux-gnu/bin/aarch64-none-linux-gnu-gcc -O3 -march=armv8.2-a+sve dot_lane.c -o dot_lane
+ ```
+
+ ```c
+ #include <stdio.h>
+ #include <arm_sve.h>
+
+ int64_t a[32] = { 0 };
+ int16_t b[128];
+ int16_t c[128];
+ int64_t r[32];
+ int64_t expected_r[32];
+
+ #define IMM 0
+
+ int main(void)
+ {
+ for (size_t i = 0; i < 128; i++) {
+ b[i] = 1;
+ c[i] = i / 4;
+ }
+
+ svint64_t av = svld1(svptrue_b64(), a);
+ svint16_t bv = svld1(svptrue_b16(), b);
+ svint16_t cv = svld1(svptrue_b16(), c);
+
+ svint64_t result = svdot_lane_s64(av, bv, cv, IMM);
+
+ svst1(svptrue_b64(), r, result);
+
+ for (size_t i = 0; i < svcntd(); i++) {
+ expected_r[i] =
+ (int64_t)b[i * 4 + 0] * (int64_t)c[(i - i % 2) * 4 + IMM * 4 + 0] +
+ (int64_t)b[i * 4 + 1] * (int64_t)c[(i - i % 2) * 4 + IMM * 4 + 1] +
+ (int64_t)b[i * 4 + 2] * (int64_t)c[(i - i % 2) * 4 + IMM * 4 + 2] +
+ (int64_t)b[i * 4 + 3] * (int64_t)c[(i - i % 2) * 4 + IMM * 4 + 3] +
+ a[i];
+ }
+
+ printf("%12s", "r: ");
+ for (size_t i = 0; i < svcntd(); i++) {
+ printf("%4ld", r[i]);
+ }
+ printf("\n");
+ printf("%12s", "expected_r: ");
+ for (size_t i = 0; i < svcntd(); i++) {
+ printf("%4ld", expected_r[i]);
+ }
+ printf("\n\t\t");
+ for (size_t i = 0; i < svcntd(); i++) {
+ if (r[i] != expected_r[i]) {
+ printf("%4c", '^');
+ } else {
+ printf("%4c", ' ');
+ }
+ }
+ printf("\n");
+ printf("idx:\t\t");
+ for (size_t i = 0; i < svcntd(); i++) {
+ if (r[i] != expected_r[i]) {
+ printf("%4d", i);
+ } else {
+ printf("%4c", ' ');
+ }
+ }
+ printf("\n");
+
+ return 0;
+ }
+ ```
+3. Execute it with the following commands:
+
+ ```
+ qemu-aarch64 -cpu max,sve-default-vector-length=16 -L arm-gnu-toolchain-13.3.rel1-x86_64-aarch64-none-linux-gnu/bin/../aarch64-none-linux-gnu/libc dot_lane
+ ```
+
+ Change the value of `sve-default-vector-length` to 32, 64, 128, 256 and observe the outputs, we should see that for `sve-default-vector-length` \>= 64, the result is incorrect.
+
+ `sve-default-vector-length=16`
+
+ ```
+ r: 0 0
+ expected_r: 0 0
+
+ idx:
+ ```
+
+ `sve-default-vector-length=32`
+
+ ```
+ r: 0 0 8 8
+ expected_r: 0 0 8 8
+
+ idx:
+ ```
+
+ `sve-default-vector-length=64`
+
+ ```
+ r: 0 0 8 8 8 8 24 24
+ expected_r: 0 0 8 8 16 16 24 24
+ ^ ^
+ idx: 4 5
+ ```
+
+ `sve-default-vector-length=128`
+
+ ```
+ r: 0 0 8 8 8 8 24 24 24 24 40 40 40 40 56 56
+ expected_r: 0 0 8 8 16 16 24 24 32 32 40 40 48 48 56 56
+ ^ ^ ^ ^ ^ ^
+ idx: 4 5 8 9 12 13
+ ```
+
+ `sve-default-vector-length=256`
+
+ ```
+ r: 0 0 8 8 8 8 24 24 24 24 40 40 40 40 56 56 56 56 72 72 72 72 88 88 88 88 104 104 104 104 120 120
+ expected_r: 0 0 8 8 16 16 24 24 32 32 40 40 48 48 56 56 64 64 72 72 80 80 88 88 96 96 104 104 112 112 120 120
+ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^
+ idx: 4 5 8 9 12 13 16 17 20 21 24 25 28 29
+ ```
+4. By passing `-S` to the compiler, we can see that sdot (or udot if using `svdot_lane_u64()`) is produced in assembly (`sdot z0.d, z1.h, z2.h[0]`), which is correct behavior according to [Intrinsics – Arm Developer](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdot_lane%5B_s64%5D).
+Additional information:
+