diff options
Diffstat (limited to 'classification_output/02/other/23448582')
| -rw-r--r-- | classification_output/02/other/23448582 | 266 |
1 files changed, 0 insertions, 266 deletions
diff --git a/classification_output/02/other/23448582 b/classification_output/02/other/23448582 deleted file mode 100644 index 6d6939b4f..000000000 --- a/classification_output/02/other/23448582 +++ /dev/null @@ -1,266 +0,0 @@ -other: 0.990 -semantic: 0.987 -instruction: 0.983 -mistranslation: 0.982 -boot: 0.967 - -[BUG REPORT] cxl process in infinity loop - -Hi, all - -When I did the cxl memory hot-plug test on QEMU, I accidentally connected -two memdev to the same downstream port, the command like below: - -> --object memory-backend-ram,size=262144k,share=on,id=vmem0 \ -> --object memory-backend-ram,size=262144k,share=on,id=vmem1 \ -> --device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \ -> --device cxl-rp,port=0,bus=cxl.1,id=root_port0,chassis=0,slot=0 \ -> --device cxl-upstream,bus=root_port0,id=us0 \ -> --device cxl-downstream,port=0,bus=us0,id=swport00,chassis=0,slot=5 \ -> --device cxl-downstream,port=0,bus=us0,id=swport01,chassis=0,slot=7 \ -same downstream port but has different slot! - -> --device cxl-type3,bus=swport00,volatile-memdev=vmem0,id=cxl-vmem0 \ -> --device cxl-type3,bus=swport01,volatile-memdev=vmem1,id=cxl-vmem1 \ -> --M -> -cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=64G,cxl-fmw.0.interleave-granularity=4k -> -\ -There is no error occurred when vm start, but when I executed the âcxl listâ -command to view -the CXL objects info, the process can not end properly. - -Then I used strace to trace the process, I found that the process is in -infinity loop: -# strace cxl list -...... -clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 -openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 -write(3, "1\n\0", 3) = 3 -close(3) = 0 -access("/run/udev/queue", F_OK) = 0 -clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 -openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 -write(3, "1\n\0", 3) = 3 -close(3) = 0 -access("/run/udev/queue", F_OK) = 0 -clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 -openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 -write(3, "1\n\0", 3) = 3 -close(3) = 0 -access("/run/udev/queue", F_OK) = 0 -clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 -openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 -write(3, "1\n\0", 3) = 3 -close(3) = 0 -access("/run/udev/queue", F_OK) = 0 -clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 -openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 -write(3, "1\n\0", 3) = 3 -close(3) = 0 -access("/run/udev/queue", F_OK) = 0 -clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 -openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 -write(3, "1\n\0", 3) = 3 -close(3) = 0 -access("/run/udev/queue", F_OK) = 0 -clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 -openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 -write(3, "1\n\0", 3) = 3 -close(3) = 0 -access("/run/udev/queue", F_OK) = 0 -clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 -openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 -write(3, "1\n\0", 3) = 3 -close(3) = 0 -access("/run/udev/queue", F_OK) = 0 - -[Environment]: -linux: V6.10-rc3 -QEMU: V9.0.0 -ndctl: v79 - -I know this is because of the wrong use of the QEMU command, but I think we -should -be aware of this error in one of the QEMU, OS or ndctl side at least. - -Thanks -Xingtao - -On Tue, 2 Jul 2024 00:30:06 +0000 -"Xingtao Yao (Fujitsu)" <yaoxt.fnst@fujitsu.com> wrote: - -> -Hi, all -> -> -When I did the cxl memory hot-plug test on QEMU, I accidentally connected -> -two memdev to the same downstream port, the command like below: -> -> -> -object memory-backend-ram,size=262144k,share=on,id=vmem0 \ -> -> -object memory-backend-ram,size=262144k,share=on,id=vmem1 \ -> -> -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \ -> -> -device cxl-rp,port=0,bus=cxl.1,id=root_port0,chassis=0,slot=0 \ -> -> -device cxl-upstream,bus=root_port0,id=us0 \ -> -> -device cxl-downstream,port=0,bus=us0,id=swport00,chassis=0,slot=5 \ -> -> -device cxl-downstream,port=0,bus=us0,id=swport01,chassis=0,slot=7 \ -> -same downstream port but has different slot! -> -> -> -device cxl-type3,bus=swport00,volatile-memdev=vmem0,id=cxl-vmem0 \ -> -> -device cxl-type3,bus=swport01,volatile-memdev=vmem1,id=cxl-vmem1 \ -> -> -M -> -> cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=64G,cxl-fmw.0.interleave-granularity=4k -> -> \ -> -> -There is no error occurred when vm start, but when I executed the âcxl listâ -> -command to view -> -the CXL objects info, the process can not end properly. -I'd be happy to look preventing this on QEMU side if you send one, -but in general there are are lots of ways to shoot yourself in the -foot with CXL and PCI device emulation in QEMU so I'm not going -to rush to solve this specific one. - -Likewise, some hardening in kernel / userspace probably makes sense but -this is a non compliant switch so priority of a fix is probably fairly low. - -Jonathan - -> -> -Then I used strace to trace the process, I found that the process is in -> -infinity loop: -> -# strace cxl list -> -...... -> -clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 -> -openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 -> -write(3, "1\n\0", 3) = 3 -> -close(3) = 0 -> -access("/run/udev/queue", F_OK) = 0 -> -clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 -> -openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 -> -write(3, "1\n\0", 3) = 3 -> -close(3) = 0 -> -access("/run/udev/queue", F_OK) = 0 -> -clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 -> -openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 -> -write(3, "1\n\0", 3) = 3 -> -close(3) = 0 -> -access("/run/udev/queue", F_OK) = 0 -> -clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 -> -openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 -> -write(3, "1\n\0", 3) = 3 -> -close(3) = 0 -> -access("/run/udev/queue", F_OK) = 0 -> -clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 -> -openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 -> -write(3, "1\n\0", 3) = 3 -> -close(3) = 0 -> -access("/run/udev/queue", F_OK) = 0 -> -clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 -> -openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 -> -write(3, "1\n\0", 3) = 3 -> -close(3) = 0 -> -access("/run/udev/queue", F_OK) = 0 -> -clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 -> -openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 -> -write(3, "1\n\0", 3) = 3 -> -close(3) = 0 -> -access("/run/udev/queue", F_OK) = 0 -> -clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=1000000}, NULL) = 0 -> -openat(AT_FDCWD, "/sys/bus/cxl/flush", O_WRONLY|O_CLOEXEC) = 3 -> -write(3, "1\n\0", 3) = 3 -> -close(3) = 0 -> -access("/run/udev/queue", F_OK) = 0 -> -> -[Environment]: -> -linux: V6.10-rc3 -> -QEMU: V9.0.0 -> -ndctl: v79 -> -> -I know this is because of the wrong use of the QEMU command, but I think we -> -should -> -be aware of this error in one of the QEMU, OS or ndctl side at least. -> -> -Thanks -> -Xingtao - |