instruction: 0.968 graphic: 0.919 device: 0.848 assembly: 0.842 socket: 0.826 vnc: 0.774 network: 0.701 semantic: 0.687 boot: 0.686 mistranslation: 0.642 other: 0.581 KVM: 0.346 qemu-aarch64: Incorrect result for ssra instruction when using vector lengths of 1024-bit or higher. Description of problem: ``` #include #include #define SZ 32 int main(int argc, char* argv[]) { svbool_t pg = svptrue_b64(); uint64_t VL = svcntd(); fprintf(stderr, "One SVE vector can hold %li uint64_ts\n", VL); int64_t sr[SZ], sx[SZ], sy[SZ]; uint64_t ur[SZ], ux[SZ], uy[SZ]; for (uint64_t i = 0; i < SZ; ++i) { sx[i] = ux[i] = 0; sy[i] = uy[i] = 1024; } for (uint64_t i = 0; i < SZ; i+=VL) { fprintf(stderr, "Processing elements %li - %li\n", i, i + VL - 1); svint64_t SX = svld1(pg, sx + i); svint64_t SY = svld1(pg, sy + i); svint64_t SR = svsra(SX, SY, 4); svst1(pg, sr + i, SR); svuint64_t UX = svld1(pg, ux + i); svuint64_t UY = svld1(pg, uy + i); svuint64_t UR = svsra(UX, UY, 4); svst1(pg, ur + i, UR); } for (uint64_t i = 0; i < SZ; ++i) { fprintf(stderr, "sr[%li]=%li, ur[%li]\n", i, sr[i], ur[i]); } return 0; } ``` Steps to reproduce: 1. Build the above C source using "gcc -march=armv9-a -O1 ssra.c", can also use clang. 2. Run with "qemu-aarch64 -cpu max,sve-default-vector-length=64 ./a.out" and you'll see the expected result of 64 (signed and unsigned) 3. Run with "qemu-aarch64 -cpu max,sve-default-vector-length=128 ./a.out" and you'll see the expected result of 64 for unsigned but the signed result is 0. This suggests the emulation of SVE2 ssra instruction is incorrect for this and bigger vector lengths. Additional information: