diff options
Diffstat (limited to 'gitlab/issues/target_missing/host_missing/accel_missing/819.toml')
| -rw-r--r-- | gitlab/issues/target_missing/host_missing/accel_missing/819.toml | 85 |
1 files changed, 85 insertions, 0 deletions
diff --git a/gitlab/issues/target_missing/host_missing/accel_missing/819.toml b/gitlab/issues/target_missing/host_missing/accel_missing/819.toml new file mode 100644 index 00000000..deacd2b7 --- /dev/null +++ b/gitlab/issues/target_missing/host_missing/accel_missing/819.toml @@ -0,0 +1,85 @@ +id = 819 +title = "watchdog: BUG: soft lockup - CPU#1 stuck for 22s! [swapper/1:0]" +state = "closed" +created_at = "2022-01-14T14:13:28.390Z" +closed_at = "2023-01-27T13:03:23.834Z" +labels = ["Migration"] +url = "https://gitlab.com/qemu-project/qemu/-/issues/819" +host-os = "proxmox 6.4" +host-arch = "x86_64" +qemu-version = "pve-qemu-kvm_5.2" +guest-os = "Debian 10.10" +guest-arch = "x86_64" +description = """During virtual disk live move/migration, VMs get severe stuttering and even cpu soft lockups, as described here: + +https://bugzilla.kernel.org/show_bug.cgi?id=199727 + +This also happens on some of our virtual machines when i/o load inside VM is high or workload is fsync centric. + +i'm searching for a solution to mitigate this problem, i.e. i can live with the stuttering/delays of several seconds, but getting cpu soft lockups of 22s or higher is inacceptable. + +i have searched the web for a long long time now, but did not find a solution , nor did i find a way on how to troubleshoot this more in depth to find the real root cause. + +if this issue report will not getting accepted because of "non native qemu" (i.e. proxmox platform) , please tell me which qemu/distro i can/should use instead (which has easy usable live migration feature) to try reproducing the problem.""" +reproduce = """1. do a live migration of one or more virtual machine disks +2. watch "ioping -WWWYy test.dat" inside VM (being moved) for disk latency +3. you disk latency is heavily varying , from time to time it goes up to vaues of tens seconds, even leading to kernel messages like " kernel:[ 2155.520846] watchdog: BUG: soft lockup - CPU#1 stuck for 22s! [swapper/1:0]" + +``` +4 KiB >>> test.dat (ext4 /dev/sda1): request=55 time=1.07 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=56 time=1.24 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=57 time=567.4 ms (fast) +4 KiB >>> test.dat (ext4 /dev/sda1): request=58 time=779.0 ms (fast) +4 KiB >>> test.dat (ext4 /dev/sda1): request=59 time=589.0 ms (fast) +4 KiB >>> test.dat (ext4 /dev/sda1): request=60 time=1.57 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=61 time=847.7 ms (fast) +4 KiB >>> test.dat (ext4 /dev/sda1): request=62 time=933.0 ms +4 KiB >>> test.dat (ext4 /dev/sda1): request=63 time=891.4 ms (fast) +4 KiB >>> test.dat (ext4 /dev/sda1): request=64 time=820.8 ms (fast) +4 KiB >>> test.dat (ext4 /dev/sda1): request=65 time=1.02 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=66 time=2.44 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=67 time=620.7 ms (fast) +4 KiB >>> test.dat (ext4 /dev/sda1): request=68 time=1.03 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=69 time=1.24 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=70 time=1.42 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=71 time=1.36 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=72 time=1.41 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=73 time=1.33 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=74 time=2.36 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=75 time=1.46 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=76 time=1.45 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=77 time=1.28 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=78 time=1.41 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=79 time=2.33 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=80 time=1.39 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=81 time=1.35 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=82 time=1.54 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=83 time=1.52 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=84 time=1.50 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=85 time=2.00 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=86 time=1.47 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=87 time=1.26 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=88 time=1.29 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=89 time=2.05 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=90 time=1.44 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=91 time=1.43 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=92 time=1.72 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=93 time=1.77 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=94 time=2.56 s + +Message from syslogd@iotest2 at Jan 14 14:51:12 ... + kernel:[ 2155.520846] watchdog: BUG: soft lockup - CPU#1 stuck for 22s! [swapper/1:0] +4 KiB >>> test.dat (ext4 /dev/sda1): request=95 time=22.5 s (slow) +4 KiB >>> test.dat (ext4 /dev/sda1): request=96 time=3.56 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=97 time=1.52 s (fast) +4 KiB >>> test.dat (ext4 /dev/sda1): request=98 time=1.69 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=99 time=1.90 s +4 KiB >>> test.dat (ext4 /dev/sda1): request=100 time=1.15 s (fast) +4 KiB >>> test.dat (ext4 /dev/sda1): request=101 time=890.0 ms (fast) +4 KiB >>> test.dat (ext4 /dev/sda1): request=102 time=959.6 ms (fast) +4 KiB >>> test.dat (ext4 /dev/sda1): request=103 time=926.5 ms (fast) +4 KiB >>> test.dat (ext4 /dev/sda1): request=104 time=791.5 ms (fast) +4 KiB >>> test.dat (ext4 /dev/sda1): request=105 time=577.8 ms (fast) +4 KiB >>> test.dat (ext4 /dev/sda1): request=106 time=867.7 ms (fast) +```""" +additional = "n/a" |