diff options
Diffstat (limited to 'results/classifier/105/other/1916112')
| -rw-r--r-- | results/classifier/105/other/1916112 | 172 |
1 files changed, 172 insertions, 0 deletions
diff --git a/results/classifier/105/other/1916112 b/results/classifier/105/other/1916112 new file mode 100644 index 00000000..3bfe9494 --- /dev/null +++ b/results/classifier/105/other/1916112 @@ -0,0 +1,172 @@ +other: 0.865 +instruction: 0.846 +boot: 0.796 +device: 0.733 +graphic: 0.724 +assembly: 0.690 +semantic: 0.666 +mistranslation: 0.641 +vnc: 0.628 +network: 0.623 +KVM: 0.601 +socket: 0.592 + +Illegal instruction crash of QEMU on Jetson Nano + +I have a jetson nano (arm64 SBC) and I want to check the native emulation performance of Raspbian Buster. I used the info available here: + +https://github.com/dhruvvyas90/qemu-rpi-kernel/tree/master/native-emuation + +I have Xubuntut 20.04 with KVM enabled kernel running on the Jetson Nano + +However QEMU crashes with "Illegal Instruction" during kernel boot. I have a built latest QEMU from sources with following configuration + +./configure --prefix=/usr/local --target-list=aarch64-softmmu,arm-softmmu --enable-guest-agent --enable-vnc --enable-vnc-jpeg --enable-vnc-png --enable-kvm --enable-spice --enable-sdl --enable-gtk --enable-virglrenderer --enable-opengl + +qemu-system-aarch64 --version +QEMU emulator version 5.2.50 (v5.2.0-1731-g5b19cb63d9) + +When I run as follows: + +../build/qemu-system-aarch64 -M raspi3 +-append "rw earlyprintk loglevel=8 console=ttyAMA0,115200 dwc_otg.lpm_enable=0 root=/dev/mmcblk0p2 rootdelay=1" +-dtb ./bcm2710-rpi-3-b-plus.dtb +-sd /media/96747D21747D0571/JetsonNano/2020-08-20-raspios-buster-armhf-full.qcow2 +-kernel ./kernel8.img +-m 1G -smp 4 -serial stdio -usb -device usb-mouse -device usb-kbd + +I get : +[ 74.994834] systemd[1]: Condition check resulted in FUSE Control File System being skipped. +[ 76.281274] systemd[1]: Starting Apply Kernel Variables... +Starting Apply Kernel Variables... +Illegal instruction (core dumped) + +When I use GDB I see this: + +Thread 8 "qemu-system-aar" received signal SIGILL, Illegal instruction. +[Switching to Thread 0x7fad7f9ba0 (LWP 28037)] +0x0000007f888ac690 in code_gen_buffer () +(gdb) bt +#0 0x0000007f888ac690 in code_gen_buffer () +#1 0x0000005555d7c038 in cpu_tb_exec (tb_exit=, itb=, cpu=0x7fb4502c40) +at ../accel/tcg/cpu-exec.c:191 +#2 cpu_loop_exec_tb (tb_exit=, last_tb=, tb=, cpu=0x7fb4502c40) +at ../accel/tcg/cpu-exec.c:708 +#3 cpu_exec (cpu=cpu@entry=0x7fb4502c40) at ../accel/tcg/cpu-exec.c:819 +.. + +I have just two questions: + +Is this a problem with QEMU or is there anything specific build or options I need to use. Any specific version of QEMU should be used ? + +Why is TCG used as the accelerator when KVM is present. Is it possible and how to use KVM ? + +If I enabled the KVM then I get this error: + +../build/qemu-system-aarch64 -M raspi3 -enable-kvm -append "rw earlyprintk loglevel=8 console=ttyAMA0,115200 dwc_otg.lpm_enable=0 root=/dev/mmcblk0p2 rootdelay=1" -dtb ./bcm2710-rpi-3-b-plus.dtb -sd /media/96747D21747D0571/JetsonNano/2020-08-20-raspios-buster-armhf-full.qcow2 -kernel ./kernel8.img -m 1G -smp 4 -serial stdio -usb -device usb-mouse -device usb-kbd +WARNING: Image format was not specified for '/media/96747D21747D0571/JetsonNano/2020-08-20-raspios-buster-armhf-full.img' and probing guessed raw. + Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. + Specify the 'raw' format explicitly to remove the restrictions. +qemu-system-aarch64: ../softmmu/physmem.c:750: cpu_address_space_init: Assertion `asidx == 0 || !kvm_enabled()' failed. + +Thanks a lot. + +Can you use gdb to display what the instruction that provoked the SIGILL is ? ("disas $pc-32,$pc+32" or similar should do it). + +Re "Why is TCG used as the accelerator when KVM is present?", the answer is that only certain board types and CPU types work with KVM. The simple answer is "only the 'virt' board works with KVM". Other boards generally use a CPU type or CPU features which KVM does not support and so TCG is the only choice. It's a QEMU bug that we assert() rather than printing a helpful error message (which we will probably fix for 6.0.) + + +Disassembly: + +[ OK ] Mounted RPC Pipe File System. +[ 75.916706] systemd[1]: Started Create list of required static device nodes for the current kernel. +[ OK ] Started Create list of req… nodes for the current kernel. + +Thread 7 "qemu-system-aar" received signal SIGILL, Illegal instruction. +[Switching to Thread 0x7fade0aba0 (LWP 7777)] +0x0000007f8aca04d0 in code_gen_buffer () +(gdb) disas $pc-32,$pc+32 +Dump of assembler code from 0x7f8aca04b0 to 0x7f8aca04f0: + 0x0000007f8aca04b0 <code_gen_buffer+162653284>: cmp x0, x3 + 0x0000007f8aca04b4 <code_gen_buffer+162653288>: b.ne 0x7f8aca0908 <code_gen_buffer+162654396> // b.any + 0x0000007f8aca04b8 <code_gen_buffer+162653292>: ldr x23, [x1, x23] + 0x0000007f8aca04bc <code_gen_buffer+162653296>: str x23, [x19, #3688] + 0x0000007f8aca04c0 <code_gen_buffer+162653300>: add w22, w22, w21 + 0x0000007f8aca04c4 <code_gen_buffer+162653304>: str w22, [x19, #16] + 0x0000007f8aca04c8 <code_gen_buffer+162653308>: ldr d0, [x19, #3944] + 0x0000007f8aca04cc <code_gen_buffer+162653312>: ldr d1, [x19, #4192] +=> 0x0000007f8aca04d0 <code_gen_buffer+162653316>: .inst 0x2ee0b822 ; undefined + 0x0000007f8aca04d4 <code_gen_buffer+162653320>: movi d3, #0xff + 0x0000007f8aca04d8 <code_gen_buffer+162653324>: and v1.8b, v1.8b, v3.8b + 0x0000007f8aca04dc <code_gen_buffer+162653328>: and v2.8b, v2.8b, v3.8b + 0x0000007f8aca04e0 <code_gen_buffer+162653332>: .inst 0x2ee14404 ; undefined + 0x0000007f8aca04e4 <code_gen_buffer+162653336>: .inst 0x2ee0b845 ; und--Typ--Ty--Ty-----Ty--T--Type--Type-----Ty--T--Type <RET> for more, q to quit, c to continue without paging-- +efined + 0x0000007f8aca04e8 <code_gen_buffer+162653340>: .inst 0x2ee54400 ; undefined + 0x0000007f8aca04ec <code_gen_buffer+162653344>: ldr d5, 0x7f8aca09f0 <code_gen_buffer+162654628> +End of assembler dump. + + +I can confirm the issue (tested with Jetson Nano and Xavier running Ubuntu bionic). + +Linux starts booting, shows "Welcome to Raspbian GNU/Linux 10 (buster)!" after 33 s on Nano, but QEMU crashes after showing "Mounted Kernel Debug File System" about 77 s after start. + +Debugging is difficult because the failing address in code_gen_buffer changes for each new run, so setting a hardware watchpoint does not work. The disassembly is always the same, only the address of the code changes. + +The same test works better when I use the TCG interpreter (./configure [...] --enable-tcg-interpreter) although a warning says that TCI has less reliability and that it is strongly recommended to remove the --enable-tcg-interpreter configuration option. It reaches "Welcome" after 205 s on Xavier, "Mounted" after 420 s and finally goes into emergency mode because of "Dependency failed for /boot". Linux shows "Press Enter to continue.", but I don't get a command line after doing so. + + +TCG works and I get a Linux boot prompt in the guest Raspbian when vector instructions for TCG are disabled, so obviously the undefined instruction is simply unsupported for Jetson Nano and Xavier. + +Patch used to disable it: + +diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h +index 5ec30dba25..2240adad1e 100644 +--- a/tcg/aarch64/tcg-target.h ++++ b/tcg/aarch64/tcg-target.h +@@ -125,8 +125,8 @@ typedef enum { + #define TCG_TARGET_HAS_mulsh_i64 1 + #define TCG_TARGET_HAS_direct_jump 1 + +-#define TCG_TARGET_HAS_v64 1 +-#define TCG_TARGET_HAS_v128 1 ++#define TCG_TARGET_HAS_v64 0 ++#define TCG_TARGET_HAS_v128 0 + #define TCG_TARGET_HAS_v256 0 + + #define TCG_TARGET_HAS_andc_vec 1 + + +This has nothing to do with the Jetson Nano, which +uses a very standard Arm cortex-a57. + +I can reproduce this on any arm64 host. + +The sigill is for the code generated for the aa32 instruction + +0xf7ca0820: f3780407 vshl.u64 d16, d7, d8 + + ---- 00000000f7ca0820 0000000000000000 0000000000000000 + ld_vec v64,e8,tmp9,env,$0xf68 pref=0xffffffff00000000 + ld_vec v64,e8,tmp10,env,$0x1060 pref=0xffffffff00000000 + neg_vec v64,e64,tmp15,tmp10 pref=0xffffffff00000000 + ... + + -- guest addr 0x00000000f7ca0820 +0xffff2a790d88: fd47b660 ldr d0, [x19, #0xf68] +0xffff2a790d8c: fd483261 ldr d1, [x19, #0x1060] +0xffff2a790d90: 2ee0b822 .byte 0x22, 0xb8, 0xe0, 0x2e + +The illegal instruction is attempting neg (vector) with v1.1d, +but that runs afoul of the isa constraint + + if size:Q == '110' then UNDEFINED; + +We should have used neg (scalar) instead. + +I can replicate the sigill with RISU. + +Now fixed in master, commit d81bad24dfea6ec0 + +Working well now. Thank you. + |