results/classifier/zero-shot/108/graphic/2376


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129

graphic: 0.950
debug: 0.914
PID: 0.873
performance: 0.872
device: 0.870
other: 0.869
semantic: 0.861
files: 0.850
permissions: 0.845
vnc: 0.842
boot: 0.827
network: 0.820
socket: 0.798
KVM: 0.760

A bug in ARM VCMLA.f16/VCMLA.f32 instructions
Description of problem:
The vcmla instruction performs complex-number operations on the vector registers. There is a bug in which this instruction modifies the contents of an irrelevant vector register.

The reason is simple out-of-bound; the helper functions should correctly check the number of modified elements:
```
// target/arm/tcg/vec_helper.c
void HELPER(gvec_fcmlah_idx)(void *vd, void *vn, void *vm, void *va,
                             void *vfpst, uint32_t desc)
{
    uintptr_t opr_sz = simd_oprsz(desc);
    float16 *d = vd, *n = vn, *m = vm, *a = va;
    float_status *fpst = vfpst;
    intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
    uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
    intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2);
    uint32_t neg_real = flip ^ neg_imag;
    intptr_t elements = opr_sz / sizeof(float16);
    intptr_t eltspersegment = 16 / sizeof(float16); // This should be fixed;
    intptr_t i, j;

    ...
}

...

void HELPER(gvec_fcmlas_idx)(void *vd, void *vn, void *vm, void *va,
                             void *vfpst, uint32_t desc)
{
    uintptr_t opr_sz = simd_oprsz(desc);
    float32 *d = vd, *n = vn, *m = vm, *a = va;
    float_status *fpst = vfpst;
    intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
    uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
    intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2);
    uint32_t neg_real = flip ^ neg_imag;
    intptr_t elements = opr_sz / sizeof(float32);
    intptr_t eltspersegment = 16 / sizeof(float32); // This should be fixed;
    intptr_t i, j;

    ...
}
```
Steps to reproduce:
1. Write `test.c`.
```
#include <stdint.h>
#include <stdio.h>
#include <string.h>

// zero inputs should produce zero output
char i_D4[8] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
char i_D8[8] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
char i_D30[8] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
char i_D31[8] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; // this should never be touched
char o_D30[8];
char o_D31[8];

void __attribute__ ((noinline)) show_state() {
    printf("D30: ");
    for (int i = 0; i < 8; i++) {
        printf("%02x ", o_D30[i]);
    }
    printf("\n");
    printf("D31: ");
    for (int i = 0; i < 8; i++) {
        printf("%02x ", o_D31[i]);
    }
    printf("\n");
}

void __attribute__ ((noinline)) run() {
    __asm__ (
        "movw r7, #:lower16:i_D4\n"
        "movt r7, #:upper16:i_D4\n"
        "vldr d4, [r7]\n"
        "movw r7, #:lower16:i_D8\n"
        "movt r7, #:upper16:i_D8\n"
        "vldr d8, [r7]\n"
        "movw r7, #:lower16:i_D30\n"
        "movt r7, #:upper16:i_D30\n"
        "vldr d30, [r7]\n"
        "movw r7, #:lower16:i_D31\n"
        "movt r7, #:upper16:i_D31\n"
        "vldr d31, [r7]\n"
        "adr r7, Lbl_thumb + 1\n"
        "bx r7\n"
        ".thumb\n"
        "Lbl_thumb:\n"
        ".inst 0xfed8e804\n" // vcmla.f32       d30, d8, d4[0], #90
        "adr r7, Lbl_arm\n"
        "bx r7\n"
        ".arm\n"
        "Lbl_arm:\n"
        "movw r7, #:lower16:o_D30\n"
        "movt r7, #:upper16:o_D30\n"
        "vstr d30, [r7]\n"
        "movw r7, #:lower16:o_D31\n"
        "movt r7, #:upper16:o_D31\n"
        "vstr d31, [r7]\n"
    );
}

int main(int argc, char **argv) {
    run();
    show_state();
    return 0;
}
```
2. Compile `test.bin` using this command: `arm-linux-gnueabihf-gcc-12 -O2 -no-pie -marm -march=armv7-a+vfpv4 ./test.c -o ./test.bin`.
3. Run QEMU using this command: `qemu-arm -L /usr/arm-linux-gnueabihf/ ./test.bin`.
4. The program, runs on top of the buggy QEMU, prints the value of D31 as `00 00 c0 7f 00 00 c0 7f`. It should print `ff ff ff ff ff ff ff ff` after the bug is fixed.
Additional information: