diff options
Diffstat (limited to 'results/classifier/016/user-level/23448582')
| -rw-r--r-- | results/classifier/016/user-level/23448582 | 292 |
1 files changed, 292 insertions, 0 deletions
diff --git a/results/classifier/016/user-level/23448582 b/results/classifier/016/user-level/23448582 new file mode 100644 index 00000000..32096f4f --- /dev/null +++ b/results/classifier/016/user-level/23448582 @@ -0,0 +1,292 @@ +user-level: 0.896 +debug: 0.851 +virtual: 0.759 +kernel: 0.639 +operating system: 0.498 +TCG: 0.132 +x86: 0.132 +device: 0.126 +PID: 0.104 +ppc: 0.071 +hypervisor: 0.058 +VMM: 0.057 +risc-v: 0.041 +files: 0.031 +register: 0.028 +performance: 0.025 +socket: 0.020 +i386: 0.016 +arm: 0.013 +assembly: 0.010 +vnc: 0.009 +architecture: 0.007 +peripherals: 0.006 +semantic: 0.005 +network: 0.005 +KVM: 0.004 +alpha: 0.003 +boot: 0.003 +graphic: 0.003 +permissions: 0.003 +mistranslation: 0.001 + +[BUG REPORT] cxl process in infinity loop + +Hi, all + +When I did the cxl memory hot-plug test on QEMU, I accidentally connected +two memdev to the same downstream port, the command like below: + +> +-object memory-backend-ram,size=262144k,share=on,id=vmem0 \ +> +-object memory-backend-ram,size=262144k,share=on,id=vmem1 \ +> +-device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \ +> +-device cxl-rp,port=0,bus=cxl.1,id=root_port0,chassis=0,slot=0 \ +> +-device cxl-upstream,bus=root_port0,id=us0 \ +> +-device cxl-downstream,port=0,bus=us0,id=swport00,chassis=0,slot=5 \ +> +-device cxl-downstream,port=0,bus=us0,id=swport01,chassis=0,slot=7 \ +same downstream port but has different slot! + +> +-device cxl-type3,bus=swport00,volatile-memdev=vmem0,id=cxl-vmem0 \ +> +-device cxl-type3,bus=swport01,volatile-memdev=vmem1,id=cxl-vmem1 \ +> +-M +> +cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=64G,cxl-fmw.0.interleave-granularity=4k +> +\ +There is no error occurred when vm start, but when I executed the âcxl listâ +command to view +the CXL objects info, the process can not end properly. + +Then I used strace to trace the process, I found that the process is in +infinity loop: +# strace cxl list +...... +clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 +openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 +write(3, "1\n\0", 3) = 3 +close(3) = 0 +access("/run/udev/queue", F_OK) = 0 +clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 +openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 +write(3, "1\n\0", 3) = 3 +close(3) = 0 +access("/run/udev/queue", F_OK) = 0 +clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 +openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 +write(3, "1\n\0", 3) = 3 +close(3) = 0 +access("/run/udev/queue", F_OK) = 0 +clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 +openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 +write(3, "1\n\0", 3) = 3 +close(3) = 0 +access("/run/udev/queue", F_OK) = 0 +clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 +openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 +write(3, "1\n\0", 3) = 3 +close(3) = 0 +access("/run/udev/queue", F_OK) = 0 +clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 +openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 +write(3, "1\n\0", 3) = 3 +close(3) = 0 +access("/run/udev/queue", F_OK) = 0 +clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 +openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 +write(3, "1\n\0", 3) = 3 +close(3) = 0 +access("/run/udev/queue", F_OK) = 0 +clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 +openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 +write(3, "1\n\0", 3) = 3 +close(3) = 0 +access("/run/udev/queue", F_OK) = 0 + +[Environment]: +linux: V6.10-rc3 +QEMU: V9.0.0 +ndctl: v79 + +I know this is because of the wrong use of the QEMU command, but I think we +should +be aware of this error in one of the QEMU, OS or ndctl side at least. + +Thanks +Xingtao + +On Tue, 2 Jul 2024 00:30:06 +0000 +"Xingtao Yao (Fujitsu)" <yaoxt.fnst@fujitsu.com> wrote: + +> +Hi, all +> +> +When I did the cxl memory hot-plug test on QEMU, I accidentally connected +> +two memdev to the same downstream port, the command like below: +> +> +> -object memory-backend-ram,size=262144k,share=on,id=vmem0 \ +> +> -object memory-backend-ram,size=262144k,share=on,id=vmem1 \ +> +> -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \ +> +> -device cxl-rp,port=0,bus=cxl.1,id=root_port0,chassis=0,slot=0 \ +> +> -device cxl-upstream,bus=root_port0,id=us0 \ +> +> -device cxl-downstream,port=0,bus=us0,id=swport00,chassis=0,slot=5 \ +> +> -device cxl-downstream,port=0,bus=us0,id=swport01,chassis=0,slot=7 \ +> +same downstream port but has different slot! +> +> +> -device cxl-type3,bus=swport00,volatile-memdev=vmem0,id=cxl-vmem0 \ +> +> -device cxl-type3,bus=swport01,volatile-memdev=vmem1,id=cxl-vmem1 \ +> +> -M +> +> cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=64G,cxl-fmw.0.interleave-granularity=4k +> +> \ +> +> +There is no error occurred when vm start, but when I executed the âcxl listâ +> +command to view +> +the CXL objects info, the process can not end properly. +I'd be happy to look preventing this on QEMU side if you send one, +but in general there are are lots of ways to shoot yourself in the +foot with CXL and PCI device emulation in QEMU so I'm not going +to rush to solve this specific one. + +Likewise, some hardening in kernel / userspace probably makes sense but +this is a non compliant switch so priority of a fix is probably fairly low. + +Jonathan + +> +> +Then I used strace to trace the process, I found that the process is in +> +infinity loop: +> +# strace cxl list +> +...... +> +clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 +> +openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 +> +write(3, "1\n\0", 3) = 3 +> +close(3) = 0 +> +access("/run/udev/queue", F_OK) = 0 +> +clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 +> +openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 +> +write(3, "1\n\0", 3) = 3 +> +close(3) = 0 +> +access("/run/udev/queue", F_OK) = 0 +> +clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 +> +openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 +> +write(3, "1\n\0", 3) = 3 +> +close(3) = 0 +> +access("/run/udev/queue", F_OK) = 0 +> +clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 +> +openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 +> +write(3, "1\n\0", 3) = 3 +> +close(3) = 0 +> +access("/run/udev/queue", F_OK) = 0 +> +clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 +> +openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 +> +write(3, "1\n\0", 3) = 3 +> +close(3) = 0 +> +access("/run/udev/queue", F_OK) = 0 +> +clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 +> +openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 +> +write(3, "1\n\0", 3) = 3 +> +close(3) = 0 +> +access("/run/udev/queue", F_OK) = 0 +> +clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 +> +openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 +> +write(3, "1\n\0", 3) = 3 +> +close(3) = 0 +> +access("/run/udev/queue", F_OK) = 0 +> +clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 +> +openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 +> +write(3, "1\n\0", 3) = 3 +> +close(3) = 0 +> +access("/run/udev/queue", F_OK) = 0 +> +> +[Environment]: +> +linux: V6.10-rc3 +> +QEMU: V9.0.0 +> +ndctl: v79 +> +> +I know this is because of the wrong use of the QEMU command, but I think we +> +should +> +be aware of this error in one of the QEMU, OS or ndctl side at least. +> +> +Thanks +> +Xingtao + |