diff options
Diffstat (limited to 'results/classifier/012/kernel virtual machine')
| -rw-r--r-- | results/classifier/012/kernel virtual machine/04472277 | 594 | ||||
| -rw-r--r-- | results/classifier/012/kernel virtual machine/23270873 | 710 | ||||
| -rw-r--r-- | results/classifier/012/kernel virtual machine/36568044 | 4599 | ||||
| -rw-r--r-- | results/classifier/012/kernel virtual machine/60339453 | 79 | ||||
| -rw-r--r-- | results/classifier/012/kernel virtual machine/71456293 | 1504 | ||||
| -rw-r--r-- | results/classifier/012/kernel virtual machine/80615920 | 366 |
6 files changed, 7852 insertions, 0 deletions
diff --git a/results/classifier/012/kernel virtual machine/04472277 b/results/classifier/012/kernel virtual machine/04472277 new file mode 100644 index 00000000..7acc17f3 --- /dev/null +++ b/results/classifier/012/kernel virtual machine/04472277 @@ -0,0 +1,594 @@ +kernel virtual machine: 0.902 +register: 0.886 +risc-v: 0.864 +architecture: 0.857 +permissions: 0.851 +device: 0.849 +debug: 0.849 +network: 0.847 +graphic: 0.846 +other: 0.846 +x86: 0.841 +performance: 0.841 +assembly: 0.841 +boot: 0.831 +vnc: 0.828 +PID: 0.826 +TCG: 0.825 +socket: 0.824 +arm: 0.821 +mistranslation: 0.817 +semantic: 0.815 +files: 0.790 + +[BUG][KVM_SET_USER_MEMORY_REGION] KVM_SET_USER_MEMORY_REGION failed + +Hi all, +I start a VM in openstack, and openstack use libvirt to start qemu VM, but now log show this ERROR. +Is there any one know this? +The ERROR log from /var/log/libvirt/qemu/instance-0000000e.log +``` +2023-03-14T10:09:17.674114Z qemu-system-x86_64: kvm_set_user_memory_region: KVM_SET_USER_MEMORY_REGION failed, slot=4, start=0xfffffffffe000000, size=0x2000: Invalid argument +kvm_set_phys_mem: error registering slot: Invalid argument +2023-03-14 10:09:18.198+0000: shutting down, reason=crashed +``` +The xml file +``` +root@c1c2:~# cat /etc/libvirt/qemu/instance-0000000e.xml +<!-- +WARNING: THIS IS AN AUTO-GENERATED FILE. CHANGES TO IT ARE LIKELY TO BE +OVERWRITTEN AND LOST. Changes to this xml configuration should be made using: + virsh edit instance-0000000e +or other application using the libvirt API. +--> +<domain type='kvm'> + <name>instance-0000000e</name> + <uuid>ff91d2dc-69a1-43ef-abde-c9e4e9a0305b</uuid> + <metadata> +  <nova:instance xmlns:nova=" +http://openstack.org/xmlns/libvirt/nova/1.1 +"> +   <nova:package version="25.1.0"/> +   <nova:name>provider-instance</nova:name> +   <nova:creationTime>2023-03-14 10:09:13</nova:creationTime> +   <nova:flavor name="cirros-os-dpu-test-1"> +    <nova:memory>64</nova:memory> +    <nova:disk>1</nova:disk> +    <nova:swap>0</nova:swap> +    <nova:ephemeral>0</nova:ephemeral> +    <nova:vcpus>1</nova:vcpus> +   </nova:flavor> +   <nova:owner> +    <nova:user uuid="ff627ad39ed94479b9c5033bc462cf78">admin</nova:user> +    <nova:project uuid="512866f9994f4ad8916d8539a7cdeec9">admin</nova:project> +   </nova:owner> +   <nova:root type="image" uuid="9e58cb69-316a-4093-9f23-c1d1bd8edffe"/> +   <nova:ports> +    <nova:port uuid="77c1dc00-af39-4463-bea0-12808f4bc340"> +     <nova:ip type="fixed" address="172.1.1.43" ipVersion="4"/> +    </nova:port> +   </nova:ports> +  </nova:instance> + </metadata> + <memory unit='KiB'>65536</memory> + <currentMemory unit='KiB'>65536</currentMemory> + <vcpu placement='static'>1</vcpu> + <sysinfo type='smbios'> +  <system> +   <entry name='manufacturer'>OpenStack Foundation</entry> +   <entry name='product'>OpenStack Nova</entry> +   <entry name='version'>25.1.0</entry> +   <entry name='serial'>ff91d2dc-69a1-43ef-abde-c9e4e9a0305b</entry> +   <entry name='uuid'>ff91d2dc-69a1-43ef-abde-c9e4e9a0305b</entry> +   <entry name='family'>Virtual Machine</entry> +  </system> + </sysinfo> + <os> +  <type arch='x86_64' machine='pc-i440fx-6.2'>hvm</type> +  <boot dev='hd'/> +  <smbios mode='sysinfo'/> + </os> + <features> +  <acpi/> +  <apic/> +  <vmcoreinfo state='on'/> + </features> + <cpu mode='host-model' check='partial'> +  <topology sockets='1' dies='1' cores='1' threads='1'/> + </cpu> + <clock offset='utc'> +  <timer name='pit' tickpolicy='delay'/> +  <timer name='rtc' tickpolicy='catchup'/> +  <timer name='hpet' present='no'/> + </clock> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> +  <emulator>/usr/bin/qemu-system-x86_64</emulator> +  <disk type='file' device='disk'> +   <driver name='qemu' type='qcow2' cache='none'/> +   <source file='/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/disk'/> +   <target dev='vda' bus='virtio'/> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> +  </disk> +  <controller type='usb' index='0' model='piix3-uhci'> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/> +  </controller> +  <controller type='pci' index='0' model='pci-root'/> +  <interface type='hostdev' managed='yes'> +   <mac address='fa:16:3e:aa:d9:23'/> +   <source> +    <address type='pci' domain='0x0000' bus='0x01' slot='0x00' function='0x5'/> +   </source> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/> +  </interface> +  <serial type='pty'> +   <log file='/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/console.log' append='off'/> +   <target type='isa-serial' port='0'> +    <model name='isa-serial'/> +   </target> +  </serial> +  <console type='pty'> +   <log file='/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/console.log' append='off'/> +   <target type='serial' port='0'/> +  </console> +  <input type='tablet' bus='usb'> +   <address type='usb' bus='0' port='1'/> +  </input> +  <input type='mouse' bus='ps2'/> +  <input type='keyboard' bus='ps2'/> +  <graphics type='vnc' port='-1' autoport='yes' listen='0.0.0.0'> +   <listen type='address' address='0.0.0.0'/> +  </graphics> +  <audio id='1' type='none'/> +  <video> +   <model type='virtio' heads='1' primary='yes'/> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> +  </video> +  <hostdev mode='subsystem' type='pci' managed='yes'> +   <source> +    <address domain='0x0000' bus='0x01' slot='0x00' function='0x6'/> +   </source> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x05' function='0x0'/> +  </hostdev> +  <memballoon model='virtio'> +   <stats period='10'/> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x06' function='0x0'/> +  </memballoon> +  <rng model='virtio'> +   <backend model='random'>/dev/urandom</backend> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x07' function='0x0'/> +  </rng> + </devices> +</domain> +``` +---- +Simon Jones + +This is happened in ubuntu22.04. +QEMU is install by apt like this: +apt install -y qemu qemu-kvm qemu-system +and QEMU version is 6.2.0 +---- +Simon Jones +Simon Jones < +batmanustc@gmail.com +> äº2023å¹´3æ21æ¥å¨äº 08:40åéï¼ +Hi all, +I start a VM in openstack, and openstack use libvirt to start qemu VM, but now log show this ERROR. +Is there any one know this? +The ERROR log from /var/log/libvirt/qemu/instance-0000000e.log +``` +2023-03-14T10:09:17.674114Z qemu-system-x86_64: kvm_set_user_memory_region: KVM_SET_USER_MEMORY_REGION failed, slot=4, start=0xfffffffffe000000, size=0x2000: Invalid argument +kvm_set_phys_mem: error registering slot: Invalid argument +2023-03-14 10:09:18.198+0000: shutting down, reason=crashed +``` +The xml file +``` +root@c1c2:~# cat /etc/libvirt/qemu/instance-0000000e.xml +<!-- +WARNING: THIS IS AN AUTO-GENERATED FILE. CHANGES TO IT ARE LIKELY TO BE +OVERWRITTEN AND LOST. Changes to this xml configuration should be made using: + virsh edit instance-0000000e +or other application using the libvirt API. +--> +<domain type='kvm'> + <name>instance-0000000e</name> + <uuid>ff91d2dc-69a1-43ef-abde-c9e4e9a0305b</uuid> + <metadata> +  <nova:instance xmlns:nova=" +http://openstack.org/xmlns/libvirt/nova/1.1 +"> +   <nova:package version="25.1.0"/> +   <nova:name>provider-instance</nova:name> +   <nova:creationTime>2023-03-14 10:09:13</nova:creationTime> +   <nova:flavor name="cirros-os-dpu-test-1"> +    <nova:memory>64</nova:memory> +    <nova:disk>1</nova:disk> +    <nova:swap>0</nova:swap> +    <nova:ephemeral>0</nova:ephemeral> +    <nova:vcpus>1</nova:vcpus> +   </nova:flavor> +   <nova:owner> +    <nova:user uuid="ff627ad39ed94479b9c5033bc462cf78">admin</nova:user> +    <nova:project uuid="512866f9994f4ad8916d8539a7cdeec9">admin</nova:project> +   </nova:owner> +   <nova:root type="image" uuid="9e58cb69-316a-4093-9f23-c1d1bd8edffe"/> +   <nova:ports> +    <nova:port uuid="77c1dc00-af39-4463-bea0-12808f4bc340"> +     <nova:ip type="fixed" address="172.1.1.43" ipVersion="4"/> +    </nova:port> +   </nova:ports> +  </nova:instance> + </metadata> + <memory unit='KiB'>65536</memory> + <currentMemory unit='KiB'>65536</currentMemory> + <vcpu placement='static'>1</vcpu> + <sysinfo type='smbios'> +  <system> +   <entry name='manufacturer'>OpenStack Foundation</entry> +   <entry name='product'>OpenStack Nova</entry> +   <entry name='version'>25.1.0</entry> +   <entry name='serial'>ff91d2dc-69a1-43ef-abde-c9e4e9a0305b</entry> +   <entry name='uuid'>ff91d2dc-69a1-43ef-abde-c9e4e9a0305b</entry> +   <entry name='family'>Virtual Machine</entry> +  </system> + </sysinfo> + <os> +  <type arch='x86_64' machine='pc-i440fx-6.2'>hvm</type> +  <boot dev='hd'/> +  <smbios mode='sysinfo'/> + </os> + <features> +  <acpi/> +  <apic/> +  <vmcoreinfo state='on'/> + </features> + <cpu mode='host-model' check='partial'> +  <topology sockets='1' dies='1' cores='1' threads='1'/> + </cpu> + <clock offset='utc'> +  <timer name='pit' tickpolicy='delay'/> +  <timer name='rtc' tickpolicy='catchup'/> +  <timer name='hpet' present='no'/> + </clock> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> +  <emulator>/usr/bin/qemu-system-x86_64</emulator> +  <disk type='file' device='disk'> +   <driver name='qemu' type='qcow2' cache='none'/> +   <source file='/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/disk'/> +   <target dev='vda' bus='virtio'/> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> +  </disk> +  <controller type='usb' index='0' model='piix3-uhci'> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/> +  </controller> +  <controller type='pci' index='0' model='pci-root'/> +  <interface type='hostdev' managed='yes'> +   <mac address='fa:16:3e:aa:d9:23'/> +   <source> +    <address type='pci' domain='0x0000' bus='0x01' slot='0x00' function='0x5'/> +   </source> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/> +  </interface> +  <serial type='pty'> +   <log file='/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/console.log' append='off'/> +   <target type='isa-serial' port='0'> +    <model name='isa-serial'/> +   </target> +  </serial> +  <console type='pty'> +   <log file='/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/console.log' append='off'/> +   <target type='serial' port='0'/> +  </console> +  <input type='tablet' bus='usb'> +   <address type='usb' bus='0' port='1'/> +  </input> +  <input type='mouse' bus='ps2'/> +  <input type='keyboard' bus='ps2'/> +  <graphics type='vnc' port='-1' autoport='yes' listen='0.0.0.0'> +   <listen type='address' address='0.0.0.0'/> +  </graphics> +  <audio id='1' type='none'/> +  <video> +   <model type='virtio' heads='1' primary='yes'/> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> +  </video> +  <hostdev mode='subsystem' type='pci' managed='yes'> +   <source> +    <address domain='0x0000' bus='0x01' slot='0x00' function='0x6'/> +   </source> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x05' function='0x0'/> +  </hostdev> +  <memballoon model='virtio'> +   <stats period='10'/> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x06' function='0x0'/> +  </memballoon> +  <rng model='virtio'> +   <backend model='random'>/dev/urandom</backend> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x07' function='0x0'/> +  </rng> + </devices> +</domain> +``` +---- +Simon Jones + +This is full ERROR log +2023-03-23 08:00:52.362+0000: starting up libvirt version: 8.0.0, package: 1ubuntu7.4 (Christian Ehrhardt < +christian.ehrhardt@canonical.com +> Tue, 22 Nov 2022 15:59:28 +0100), qemu version: 6.2.0Debian 1:6.2+dfsg-2ubuntu6.6, kernel: 5.19.0-35-generic, hostname: c1c2 +LC_ALL=C \ +PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin \ +HOME=/var/lib/libvirt/qemu/domain-4-instance-0000000e \ +XDG_DATA_HOME=/var/lib/libvirt/qemu/domain-4-instance-0000000e/.local/share \ +XDG_CACHE_HOME=/var/lib/libvirt/qemu/domain-4-instance-0000000e/.cache \ +XDG_CONFIG_HOME=/var/lib/libvirt/qemu/domain-4-instance-0000000e/.config \ +/usr/bin/qemu-system-x86_64 \ +-name guest=instance-0000000e,debug-threads=on \ +-S \ +-object '{"qom-type":"secret","id":"masterKey0","format":"raw","file":"/var/lib/libvirt/qemu/domain-4-instance-0000000e/master-key.aes"}' \ +-machine pc-i440fx-6.2,usb=off,dump-guest-core=off,memory-backend=pc.ram \ +-accel kvm \ +-cpu Cooperlake,ss=on,vmx=on,pdcm=on,hypervisor=on,tsc-adjust=on,sha-ni=on,umip=on,waitpkg=on,gfni=on,vaes=on,vpclmulqdq=on,rdpid=on,movdiri=on,movdir64b=on,fsrm=on,md-clear=on,avx-vnni=on,xsaves=on,ibpb=on,ibrs=on,amd-stibp=on,amd-ssbd=on,hle=off,rtm=off,avx512f=off,avx512dq=off,avx512cd=off,avx512bw=off,avx512vl=off,avx512vnni=off,avx512-bf16=off,taa-no=off \ +-m 64 \ +-object '{"qom-type":"memory-backend-ram","id":"pc.ram","size":67108864}' \ +-overcommit mem-lock=off \ +-smp 1,sockets=1,dies=1,cores=1,threads=1 \ +-uuid ff91d2dc-69a1-43ef-abde-c9e4e9a0305b \ +-smbios 'type=1,manufacturer=OpenStack Foundation,product=OpenStack Nova,version=25.1.0,serial=ff91d2dc-69a1-43ef-abde-c9e4e9a0305b,uuid=ff91d2dc-69a1-43ef-abde-c9e4e9a0305b,family=Virtual Machine' \ +-no-user-config \ +-nodefaults \ +-chardev socket,id=charmonitor,fd=33,server=on,wait=off \ +-mon chardev=charmonitor,id=monitor,mode=control \ +-rtc base=utc,driftfix=slew \ +-global kvm-pit.lost_tick_policy=delay \ +-no-hpet \ +-no-shutdown \ +-boot strict=on \ +-device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 \ +-blockdev '{"driver":"file","filename":"/var/lib/nova/instances/_base/8b58db82a488248e7c5e769599954adaa47a5314","node-name":"libvirt-2-storage","cache":{"direct":true,"no-flush":false},"auto-read-only":true,"discard":"unmap"}' \ +-blockdev '{"node-name":"libvirt-2-format","read-only":true,"cache":{"direct":true,"no-flush":false},"driver":"raw","file":"libvirt-2-storage"}' \ +-blockdev '{"driver":"file","filename":"/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/disk","node-name":"libvirt-1-storage","cache":{"direct":true,"no-flush":false},"auto-read-only":true,"discard":"unmap"}' \ +-blockdev '{"node-name":"libvirt-1-format","read-only":false,"cache":{"direct":true,"no-flush":false},"driver":"qcow2","file":"libvirt-1-storage","backing":"libvirt-2-format"}' \ +-device virtio-blk-pci,bus=pci.0,addr=0x3,drive=libvirt-1-format,id=virtio-disk0,bootindex=1,write-cache=on \ +-add-fd set=1,fd=34 \ +-chardev pty,id=charserial0,logfile=/dev/fdset/1,logappend=on \ +-device isa-serial,chardev=charserial0,id=serial0 \ +-device usb-tablet,id=input0,bus=usb.0,port=1 \ +-audiodev '{"id":"audio1","driver":"none"}' \ +-vnc +0.0.0.0:0 +,audiodev=audio1 \ +-device virtio-vga,id=video0,max_outputs=1,bus=pci.0,addr=0x2 \ +-device vfio-pci,host=0000:01:00.5,id=hostdev0,bus=pci.0,addr=0x4 \ +-device vfio-pci,host=0000:01:00.6,id=hostdev1,bus=pci.0,addr=0x5 \ +-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x6 \ +-object '{"qom-type":"rng-random","id":"objrng0","filename":"/dev/urandom"}' \ +-device virtio-rng-pci,rng=objrng0,id=rng0,bus=pci.0,addr=0x7 \ +-device vmcoreinfo \ +-sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny \ +-msg timestamp=on +char device redirected to /dev/pts/3 (label charserial0) +2023-03-23T08:00:53.728550Z qemu-system-x86_64: kvm_set_user_memory_region: KVM_SET_USER_MEMORY_REGION failed, slot=4, start=0xfffffffffe000000, size=0x2000: Invalid argument +kvm_set_phys_mem: error registering slot: Invalid argument +2023-03-23 08:00:54.201+0000: shutting down, reason=crashed +2023-03-23 08:54:43.468+0000: starting up libvirt version: 8.0.0, package: 1ubuntu7.4 (Christian Ehrhardt < +christian.ehrhardt@canonical.com +> Tue, 22 Nov 2022 15:59:28 +0100), qemu version: 6.2.0Debian 1:6.2+dfsg-2ubuntu6.6, kernel: 5.19.0-35-generic, hostname: c1c2 +LC_ALL=C \ +PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin \ +HOME=/var/lib/libvirt/qemu/domain-5-instance-0000000e \ +XDG_DATA_HOME=/var/lib/libvirt/qemu/domain-5-instance-0000000e/.local/share \ +XDG_CACHE_HOME=/var/lib/libvirt/qemu/domain-5-instance-0000000e/.cache \ +XDG_CONFIG_HOME=/var/lib/libvirt/qemu/domain-5-instance-0000000e/.config \ +/usr/bin/qemu-system-x86_64 \ +-name guest=instance-0000000e,debug-threads=on \ +-S \ +-object '{"qom-type":"secret","id":"masterKey0","format":"raw","file":"/var/lib/libvirt/qemu/domain-5-instance-0000000e/master-key.aes"}' \ +-machine pc-i440fx-6.2,usb=off,dump-guest-core=off,memory-backend=pc.ram \ +-accel kvm \ +-cpu Cooperlake,ss=on,vmx=on,pdcm=on,hypervisor=on,tsc-adjust=on,sha-ni=on,umip=on,waitpkg=on,gfni=on,vaes=on,vpclmulqdq=on,rdpid=on,movdiri=on,movdir64b=on,fsrm=on,md-clear=on,avx-vnni=on,xsaves=on,ibpb=on,ibrs=on,amd-stibp=on,amd-ssbd=on,hle=off,rtm=off,avx512f=off,avx512dq=off,avx512cd=off,avx512bw=off,avx512vl=off,avx512vnni=off,avx512-bf16=off,taa-no=off \ +-m 64 \ +-object '{"qom-type":"memory-backend-ram","id":"pc.ram","size":67108864}' \ +-overcommit mem-lock=off \ +-smp 1,sockets=1,dies=1,cores=1,threads=1 \ +-uuid ff91d2dc-69a1-43ef-abde-c9e4e9a0305b \ +-smbios 'type=1,manufacturer=OpenStack Foundation,product=OpenStack Nova,version=25.1.0,serial=ff91d2dc-69a1-43ef-abde-c9e4e9a0305b,uuid=ff91d2dc-69a1-43ef-abde-c9e4e9a0305b,family=Virtual Machine' \ +-no-user-config \ +-nodefaults \ +-chardev socket,id=charmonitor,fd=33,server=on,wait=off \ +-mon chardev=charmonitor,id=monitor,mode=control \ +-rtc base=utc,driftfix=slew \ +-global kvm-pit.lost_tick_policy=delay \ +-no-hpet \ +-no-shutdown \ +-boot strict=on \ +-device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 \ +-blockdev '{"driver":"file","filename":"/var/lib/nova/instances/_base/8b58db82a488248e7c5e769599954adaa47a5314","node-name":"libvirt-2-storage","cache":{"direct":true,"no-flush":false},"auto-read-only":true,"discard":"unmap"}' \ +-blockdev '{"node-name":"libvirt-2-format","read-only":true,"cache":{"direct":true,"no-flush":false},"driver":"raw","file":"libvirt-2-storage"}' \ +-blockdev '{"driver":"file","filename":"/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/disk","node-name":"libvirt-1-storage","cache":{"direct":true,"no-flush":false},"auto-read-only":true,"discard":"unmap"}' \ +-blockdev '{"node-name":"libvirt-1-format","read-only":false,"cache":{"direct":true,"no-flush":false},"driver":"qcow2","file":"libvirt-1-storage","backing":"libvirt-2-format"}' \ +-device virtio-blk-pci,bus=pci.0,addr=0x3,drive=libvirt-1-format,id=virtio-disk0,bootindex=1,write-cache=on \ +-add-fd set=1,fd=34 \ +-chardev pty,id=charserial0,logfile=/dev/fdset/1,logappend=on \ +-device isa-serial,chardev=charserial0,id=serial0 \ +-device usb-tablet,id=input0,bus=usb.0,port=1 \ +-audiodev '{"id":"audio1","driver":"none"}' \ +-vnc +0.0.0.0:0 +,audiodev=audio1 \ +-device virtio-vga,id=video0,max_outputs=1,bus=pci.0,addr=0x2 \ +-device vfio-pci,host=0000:01:00.5,id=hostdev0,bus=pci.0,addr=0x4 \ +-device vfio-pci,host=0000:01:00.6,id=hostdev1,bus=pci.0,addr=0x5 \ +-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x6 \ +-object '{"qom-type":"rng-random","id":"objrng0","filename":"/dev/urandom"}' \ +-device virtio-rng-pci,rng=objrng0,id=rng0,bus=pci.0,addr=0x7 \ +-device vmcoreinfo \ +-sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny \ +-msg timestamp=on +char device redirected to /dev/pts/3 (label charserial0) +2023-03-23T08:54:44.755039Z qemu-system-x86_64: kvm_set_user_memory_region: KVM_SET_USER_MEMORY_REGION failed, slot=4, start=0xfffffffffe000000, size=0x2000: Invalid argument +kvm_set_phys_mem: error registering slot: Invalid argument +2023-03-23 08:54:45.230+0000: shutting down, reason=crashed +---- +Simon Jones +Simon Jones < +batmanustc@gmail.com +> äº2023å¹´3æ23æ¥å¨å 05:49åéï¼ +This is happened in ubuntu22.04. +QEMU is install by apt like this: +apt install -y qemu qemu-kvm qemu-system +and QEMU version is 6.2.0 +---- +Simon Jones +Simon Jones < +batmanustc@gmail.com +> äº2023å¹´3æ21æ¥å¨äº 08:40åéï¼ +Hi all, +I start a VM in openstack, and openstack use libvirt to start qemu VM, but now log show this ERROR. +Is there any one know this? +The ERROR log from /var/log/libvirt/qemu/instance-0000000e.log +``` +2023-03-14T10:09:17.674114Z qemu-system-x86_64: kvm_set_user_memory_region: KVM_SET_USER_MEMORY_REGION failed, slot=4, start=0xfffffffffe000000, size=0x2000: Invalid argument +kvm_set_phys_mem: error registering slot: Invalid argument +2023-03-14 10:09:18.198+0000: shutting down, reason=crashed +``` +The xml file +``` +root@c1c2:~# cat /etc/libvirt/qemu/instance-0000000e.xml +<!-- +WARNING: THIS IS AN AUTO-GENERATED FILE. CHANGES TO IT ARE LIKELY TO BE +OVERWRITTEN AND LOST. Changes to this xml configuration should be made using: + virsh edit instance-0000000e +or other application using the libvirt API. +--> +<domain type='kvm'> + <name>instance-0000000e</name> + <uuid>ff91d2dc-69a1-43ef-abde-c9e4e9a0305b</uuid> + <metadata> +  <nova:instance xmlns:nova=" +http://openstack.org/xmlns/libvirt/nova/1.1 +"> +   <nova:package version="25.1.0"/> +   <nova:name>provider-instance</nova:name> +   <nova:creationTime>2023-03-14 10:09:13</nova:creationTime> +   <nova:flavor name="cirros-os-dpu-test-1"> +    <nova:memory>64</nova:memory> +    <nova:disk>1</nova:disk> +    <nova:swap>0</nova:swap> +    <nova:ephemeral>0</nova:ephemeral> +    <nova:vcpus>1</nova:vcpus> +   </nova:flavor> +   <nova:owner> +    <nova:user uuid="ff627ad39ed94479b9c5033bc462cf78">admin</nova:user> +    <nova:project uuid="512866f9994f4ad8916d8539a7cdeec9">admin</nova:project> +   </nova:owner> +   <nova:root type="image" uuid="9e58cb69-316a-4093-9f23-c1d1bd8edffe"/> +   <nova:ports> +    <nova:port uuid="77c1dc00-af39-4463-bea0-12808f4bc340"> +     <nova:ip type="fixed" address="172.1.1.43" ipVersion="4"/> +    </nova:port> +   </nova:ports> +  </nova:instance> + </metadata> + <memory unit='KiB'>65536</memory> + <currentMemory unit='KiB'>65536</currentMemory> + <vcpu placement='static'>1</vcpu> + <sysinfo type='smbios'> +  <system> +   <entry name='manufacturer'>OpenStack Foundation</entry> +   <entry name='product'>OpenStack Nova</entry> +   <entry name='version'>25.1.0</entry> +   <entry name='serial'>ff91d2dc-69a1-43ef-abde-c9e4e9a0305b</entry> +   <entry name='uuid'>ff91d2dc-69a1-43ef-abde-c9e4e9a0305b</entry> +   <entry name='family'>Virtual Machine</entry> +  </system> + </sysinfo> + <os> +  <type arch='x86_64' machine='pc-i440fx-6.2'>hvm</type> +  <boot dev='hd'/> +  <smbios mode='sysinfo'/> + </os> + <features> +  <acpi/> +  <apic/> +  <vmcoreinfo state='on'/> + </features> + <cpu mode='host-model' check='partial'> +  <topology sockets='1' dies='1' cores='1' threads='1'/> + </cpu> + <clock offset='utc'> +  <timer name='pit' tickpolicy='delay'/> +  <timer name='rtc' tickpolicy='catchup'/> +  <timer name='hpet' present='no'/> + </clock> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> +  <emulator>/usr/bin/qemu-system-x86_64</emulator> +  <disk type='file' device='disk'> +   <driver name='qemu' type='qcow2' cache='none'/> +   <source file='/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/disk'/> +   <target dev='vda' bus='virtio'/> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> +  </disk> +  <controller type='usb' index='0' model='piix3-uhci'> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/> +  </controller> +  <controller type='pci' index='0' model='pci-root'/> +  <interface type='hostdev' managed='yes'> +   <mac address='fa:16:3e:aa:d9:23'/> +   <source> +    <address type='pci' domain='0x0000' bus='0x01' slot='0x00' function='0x5'/> +   </source> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/> +  </interface> +  <serial type='pty'> +   <log file='/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/console.log' append='off'/> +   <target type='isa-serial' port='0'> +    <model name='isa-serial'/> +   </target> +  </serial> +  <console type='pty'> +   <log file='/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/console.log' append='off'/> +   <target type='serial' port='0'/> +  </console> +  <input type='tablet' bus='usb'> +   <address type='usb' bus='0' port='1'/> +  </input> +  <input type='mouse' bus='ps2'/> +  <input type='keyboard' bus='ps2'/> +  <graphics type='vnc' port='-1' autoport='yes' listen='0.0.0.0'> +   <listen type='address' address='0.0.0.0'/> +  </graphics> +  <audio id='1' type='none'/> +  <video> +   <model type='virtio' heads='1' primary='yes'/> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> +  </video> +  <hostdev mode='subsystem' type='pci' managed='yes'> +   <source> +    <address domain='0x0000' bus='0x01' slot='0x00' function='0x6'/> +   </source> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x05' function='0x0'/> +  </hostdev> +  <memballoon model='virtio'> +   <stats period='10'/> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x06' function='0x0'/> +  </memballoon> +  <rng model='virtio'> +   <backend model='random'>/dev/urandom</backend> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x07' function='0x0'/> +  </rng> + </devices> +</domain> +``` +---- +Simon Jones + diff --git a/results/classifier/012/kernel virtual machine/23270873 b/results/classifier/012/kernel virtual machine/23270873 new file mode 100644 index 00000000..5083f474 --- /dev/null +++ b/results/classifier/012/kernel virtual machine/23270873 @@ -0,0 +1,710 @@ +kernel virtual machine: 0.908 +mistranslation: 0.881 +risc-v: 0.859 +other: 0.839 +boot: 0.830 +TCG: 0.828 +vnc: 0.820 +device: 0.810 +permissions: 0.802 +register: 0.797 +debug: 0.788 +assembly: 0.768 +network: 0.768 +graphic: 0.764 +arm: 0.761 +socket: 0.758 +semantic: 0.752 +performance: 0.744 +architecture: 0.742 +PID: 0.731 +x86: 0.730 +files: 0.730 + +[Qemu-devel] [BUG?] aio_get_linux_aio: Assertion `ctx->linux_aio' failed + +Hi, + +I am seeing some strange QEMU assertion failures for qemu on s390x, +which prevents a guest from starting. + +Git bisecting points to the following commit as the source of the error. + +commit ed6e2161715c527330f936d44af4c547f25f687e +Author: Nishanth Aravamudan <address@hidden> +Date: Fri Jun 22 12:37:00 2018 -0700 + + linux-aio: properly bubble up errors from initialization + + laio_init() can fail for a couple of reasons, which will lead to a NULL + pointer dereference in laio_attach_aio_context(). + + To solve this, add a aio_setup_linux_aio() function which is called + early in raw_open_common. If this fails, propagate the error up. The + signature of aio_get_linux_aio() was not modified, because it seems + preferable to return the actual errno from the possible failing + initialization calls. + + Additionally, when the AioContext changes, we need to associate a + LinuxAioState with the new AioContext. Use the bdrv_attach_aio_context + callback and call the new aio_setup_linux_aio(), which will allocate a +new AioContext if needed, and return errors on failures. If it +fails for +any reason, fallback to threaded AIO with an error message, as the + device is already in-use by the guest. + + Add an assert that aio_get_linux_aio() cannot return NULL. + + Signed-off-by: Nishanth Aravamudan <address@hidden> + Message-id: address@hidden + Signed-off-by: Stefan Hajnoczi <address@hidden> +Not sure what is causing this assertion to fail. Here is the qemu +command line of the guest, from qemu log, which throws this error: +LC_ALL=C PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin +QEMU_AUDIO_DRV=none /usr/local/bin/qemu-system-s390x -name +guest=rt_vm1,debug-threads=on -S -object +secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-21-rt_vm1/master-key.aes +-machine s390-ccw-virtio-2.12,accel=kvm,usb=off,dump-guest-core=off -m +1024 -realtime mlock=off -smp 4,sockets=4,cores=1,threads=1 -object +iothread,id=iothread1 -uuid 0cde16cd-091d-41bd-9ac2-5243df5c9a0d +-display none -no-user-config -nodefaults -chardev +socket,id=charmonitor,fd=28,server,nowait -mon +chardev=charmonitor,id=monitor,mode=control -rtc base=utc -no-shutdown +-boot strict=on -drive +file=/dev/mapper/360050763998b0883980000002a000031,format=raw,if=none,id=drive-virtio-disk0,cache=none,aio=native +-device +virtio-blk-ccw,iothread=iothread1,scsi=off,devno=fe.0.0001,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1,write-cache=on +-netdev tap,fd=30,id=hostnet0,vhost=on,vhostfd=31 -device +virtio-net-ccw,netdev=hostnet0,id=net0,mac=02:3a:c8:67:95:84,devno=fe.0.0000 +-netdev tap,fd=32,id=hostnet1,vhost=on,vhostfd=33 -device +virtio-net-ccw,netdev=hostnet1,id=net1,mac=52:54:00:2a:e5:08,devno=fe.0.0002 +-chardev pty,id=charconsole0 -device +sclpconsole,chardev=charconsole0,id=console0 -device +virtio-balloon-ccw,id=balloon0,devno=fe.3.ffba -sandbox +on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny +-msg timestamp=on +2018-07-17 15:48:42.252+0000: Domain id=21 is tainted: high-privileges +2018-07-17T15:48:42.279380Z qemu-system-s390x: -chardev +pty,id=charconsole0: char device redirected to /dev/pts/3 (label +charconsole0) +qemu-system-s390x: util/async.c:339: aio_get_linux_aio: Assertion +`ctx->linux_aio' failed. +2018-07-17 15:48:43.309+0000: shutting down, reason=failed + + +Any help debugging this would be greatly appreciated. + +Thank you +Farhan + +On 17.07.2018 [13:25:53 -0400], Farhan Ali wrote: +> +Hi, +> +> +I am seeing some strange QEMU assertion failures for qemu on s390x, +> +which prevents a guest from starting. +> +> +Git bisecting points to the following commit as the source of the error. +> +> +commit ed6e2161715c527330f936d44af4c547f25f687e +> +Author: Nishanth Aravamudan <address@hidden> +> +Date: Fri Jun 22 12:37:00 2018 -0700 +> +> +linux-aio: properly bubble up errors from initialization +> +> +laio_init() can fail for a couple of reasons, which will lead to a NULL +> +pointer dereference in laio_attach_aio_context(). +> +> +To solve this, add a aio_setup_linux_aio() function which is called +> +early in raw_open_common. If this fails, propagate the error up. The +> +signature of aio_get_linux_aio() was not modified, because it seems +> +preferable to return the actual errno from the possible failing +> +initialization calls. +> +> +Additionally, when the AioContext changes, we need to associate a +> +LinuxAioState with the new AioContext. Use the bdrv_attach_aio_context +> +callback and call the new aio_setup_linux_aio(), which will allocate a +> +new AioContext if needed, and return errors on failures. If it fails for +> +any reason, fallback to threaded AIO with an error message, as the +> +device is already in-use by the guest. +> +> +Add an assert that aio_get_linux_aio() cannot return NULL. +> +> +Signed-off-by: Nishanth Aravamudan <address@hidden> +> +Message-id: address@hidden +> +Signed-off-by: Stefan Hajnoczi <address@hidden> +> +> +> +Not sure what is causing this assertion to fail. Here is the qemu command +> +line of the guest, from qemu log, which throws this error: +> +> +> +LC_ALL=C PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin +> +QEMU_AUDIO_DRV=none /usr/local/bin/qemu-system-s390x -name +> +guest=rt_vm1,debug-threads=on -S -object +> +secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-21-rt_vm1/master-key.aes +> +-machine s390-ccw-virtio-2.12,accel=kvm,usb=off,dump-guest-core=off -m 1024 +> +-realtime mlock=off -smp 4,sockets=4,cores=1,threads=1 -object +> +iothread,id=iothread1 -uuid 0cde16cd-091d-41bd-9ac2-5243df5c9a0d -display +> +none -no-user-config -nodefaults -chardev +> +socket,id=charmonitor,fd=28,server,nowait -mon +> +chardev=charmonitor,id=monitor,mode=control -rtc base=utc -no-shutdown -boot +> +strict=on -drive +> +file=/dev/mapper/360050763998b0883980000002a000031,format=raw,if=none,id=drive-virtio-disk0,cache=none,aio=native +> +-device +> +virtio-blk-ccw,iothread=iothread1,scsi=off,devno=fe.0.0001,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1,write-cache=on +> +-netdev tap,fd=30,id=hostnet0,vhost=on,vhostfd=31 -device +> +virtio-net-ccw,netdev=hostnet0,id=net0,mac=02:3a:c8:67:95:84,devno=fe.0.0000 +> +-netdev tap,fd=32,id=hostnet1,vhost=on,vhostfd=33 -device +> +virtio-net-ccw,netdev=hostnet1,id=net1,mac=52:54:00:2a:e5:08,devno=fe.0.0002 +> +-chardev pty,id=charconsole0 -device +> +sclpconsole,chardev=charconsole0,id=console0 -device +> +virtio-balloon-ccw,id=balloon0,devno=fe.3.ffba -sandbox +> +on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny -msg +> +timestamp=on +> +> +> +> +2018-07-17 15:48:42.252+0000: Domain id=21 is tainted: high-privileges +> +2018-07-17T15:48:42.279380Z qemu-system-s390x: -chardev pty,id=charconsole0: +> +char device redirected to /dev/pts/3 (label charconsole0) +> +qemu-system-s390x: util/async.c:339: aio_get_linux_aio: Assertion +> +`ctx->linux_aio' failed. +> +2018-07-17 15:48:43.309+0000: shutting down, reason=failed +> +> +> +Any help debugging this would be greatly appreciated. +iiuc, this possibly implies AIO was not actually used previously on this +guest (it might have silently been falling back to threaded IO?). I +don't have access to s390x, but would it be possible to run qemu under +gdb and see if aio_setup_linux_aio is being called at all (I think it +might not be, but I'm not sure why), and if so, if it's for the context +in question? + +If it's not being called first, could you see what callpath is calling +aio_get_linux_aio when this assertion trips? + +Thanks! +-Nish + +On 07/17/2018 04:52 PM, Nishanth Aravamudan wrote: +iiuc, this possibly implies AIO was not actually used previously on this +guest (it might have silently been falling back to threaded IO?). I +don't have access to s390x, but would it be possible to run qemu under +gdb and see if aio_setup_linux_aio is being called at all (I think it +might not be, but I'm not sure why), and if so, if it's for the context +in question? + +If it's not being called first, could you see what callpath is calling +aio_get_linux_aio when this assertion trips? + +Thanks! +-Nish +Hi Nishant, +From the coredump of the guest this is the call trace that calls +aio_get_linux_aio: +Stack trace of thread 145158: +#0 0x000003ff94dbe274 raise (libc.so.6) +#1 0x000003ff94da39a8 abort (libc.so.6) +#2 0x000003ff94db62ce __assert_fail_base (libc.so.6) +#3 0x000003ff94db634c __assert_fail (libc.so.6) +#4 0x000002aa20db067a aio_get_linux_aio (qemu-system-s390x) +#5 0x000002aa20d229a8 raw_aio_plug (qemu-system-s390x) +#6 0x000002aa20d309ee bdrv_io_plug (qemu-system-s390x) +#7 0x000002aa20b5a8ea virtio_blk_handle_vq (qemu-system-s390x) +#8 0x000002aa20db2f6e aio_dispatch_handlers (qemu-system-s390x) +#9 0x000002aa20db3c34 aio_poll (qemu-system-s390x) +#10 0x000002aa20be32a2 iothread_run (qemu-system-s390x) +#11 0x000003ff94f879a8 start_thread (libpthread.so.0) +#12 0x000003ff94e797ee thread_start (libc.so.6) + + +Thanks for taking a look and responding. + +Thanks +Farhan + +On 07/18/2018 09:42 AM, Farhan Ali wrote: +On 07/17/2018 04:52 PM, Nishanth Aravamudan wrote: +iiuc, this possibly implies AIO was not actually used previously on this +guest (it might have silently been falling back to threaded IO?). I +don't have access to s390x, but would it be possible to run qemu under +gdb and see if aio_setup_linux_aio is being called at all (I think it +might not be, but I'm not sure why), and if so, if it's for the context +in question? + +If it's not being called first, could you see what callpath is calling +aio_get_linux_aio when this assertion trips? + +Thanks! +-Nish +Hi Nishant, +From the coredump of the guest this is the call trace that calls +aio_get_linux_aio: +Stack trace of thread 145158: +#0 0x000003ff94dbe274 raise (libc.so.6) +#1 0x000003ff94da39a8 abort (libc.so.6) +#2 0x000003ff94db62ce __assert_fail_base (libc.so.6) +#3 0x000003ff94db634c __assert_fail (libc.so.6) +#4 0x000002aa20db067a aio_get_linux_aio (qemu-system-s390x) +#5 0x000002aa20d229a8 raw_aio_plug (qemu-system-s390x) +#6 0x000002aa20d309ee bdrv_io_plug (qemu-system-s390x) +#7 0x000002aa20b5a8ea virtio_blk_handle_vq (qemu-system-s390x) +#8 0x000002aa20db2f6e aio_dispatch_handlers (qemu-system-s390x) +#9 0x000002aa20db3c34 aio_poll (qemu-system-s390x) +#10 0x000002aa20be32a2 iothread_run (qemu-system-s390x) +#11 0x000003ff94f879a8 start_thread (libpthread.so.0) +#12 0x000003ff94e797ee thread_start (libc.so.6) + + +Thanks for taking a look and responding. + +Thanks +Farhan +Trying to debug a little further, the block device in this case is a +"host device". And looking at your commit carefully you use the +bdrv_attach_aio_context callback to setup a Linux AioContext. +For some reason the "host device" struct (BlockDriver bdrv_host_device +in block/file-posix.c) does not have a bdrv_attach_aio_context defined. +So a simple change of adding the callback to the struct solves the issue +and the guest starts fine. +diff --git a/block/file-posix.c b/block/file-posix.c +index 28824aa..b8d59fb 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -3135,6 +3135,7 @@ static BlockDriver bdrv_host_device = { + .bdrv_refresh_limits = raw_refresh_limits, + .bdrv_io_plug = raw_aio_plug, + .bdrv_io_unplug = raw_aio_unplug, ++ .bdrv_attach_aio_context = raw_aio_attach_aio_context, + + .bdrv_co_truncate = raw_co_truncate, + .bdrv_getlength = raw_getlength, +I am not too familiar with block device code in QEMU, so not sure if +this is the right fix or if there are some underlying problems. +Thanks +Farhan + +On 18.07.2018 [11:10:27 -0400], Farhan Ali wrote: +> +> +> +On 07/18/2018 09:42 AM, Farhan Ali wrote: +> +> +> +> +> +> On 07/17/2018 04:52 PM, Nishanth Aravamudan wrote: +> +> > iiuc, this possibly implies AIO was not actually used previously on this +> +> > guest (it might have silently been falling back to threaded IO?). I +> +> > don't have access to s390x, but would it be possible to run qemu under +> +> > gdb and see if aio_setup_linux_aio is being called at all (I think it +> +> > might not be, but I'm not sure why), and if so, if it's for the context +> +> > in question? +> +> > +> +> > If it's not being called first, could you see what callpath is calling +> +> > aio_get_linux_aio when this assertion trips? +> +> > +> +> > Thanks! +> +> > -Nish +> +> +> +> +> +> Hi Nishant, +> +> +> +> From the coredump of the guest this is the call trace that calls +> +> aio_get_linux_aio: +> +> +> +> +> +> Stack trace of thread 145158: +> +> #0 0x000003ff94dbe274 raise (libc.so.6) +> +> #1 0x000003ff94da39a8 abort (libc.so.6) +> +> #2 0x000003ff94db62ce __assert_fail_base (libc.so.6) +> +> #3 0x000003ff94db634c __assert_fail (libc.so.6) +> +> #4 0x000002aa20db067a aio_get_linux_aio (qemu-system-s390x) +> +> #5 0x000002aa20d229a8 raw_aio_plug (qemu-system-s390x) +> +> #6 0x000002aa20d309ee bdrv_io_plug (qemu-system-s390x) +> +> #7 0x000002aa20b5a8ea virtio_blk_handle_vq (qemu-system-s390x) +> +> #8 0x000002aa20db2f6e aio_dispatch_handlers (qemu-system-s390x) +> +> #9 0x000002aa20db3c34 aio_poll (qemu-system-s390x) +> +> #10 0x000002aa20be32a2 iothread_run (qemu-system-s390x) +> +> #11 0x000003ff94f879a8 start_thread (libpthread.so.0) +> +> #12 0x000003ff94e797ee thread_start (libc.so.6) +> +> +> +> +> +> Thanks for taking a look and responding. +> +> +> +> Thanks +> +> Farhan +> +> +> +> +> +> +> +> +Trying to debug a little further, the block device in this case is a "host +> +device". And looking at your commit carefully you use the +> +bdrv_attach_aio_context callback to setup a Linux AioContext. +> +> +For some reason the "host device" struct (BlockDriver bdrv_host_device in +> +block/file-posix.c) does not have a bdrv_attach_aio_context defined. +> +So a simple change of adding the callback to the struct solves the issue and +> +the guest starts fine. +> +> +> +diff --git a/block/file-posix.c b/block/file-posix.c +> +index 28824aa..b8d59fb 100644 +> +--- a/block/file-posix.c +> ++++ b/block/file-posix.c +> +@@ -3135,6 +3135,7 @@ static BlockDriver bdrv_host_device = { +> +.bdrv_refresh_limits = raw_refresh_limits, +> +.bdrv_io_plug = raw_aio_plug, +> +.bdrv_io_unplug = raw_aio_unplug, +> ++ .bdrv_attach_aio_context = raw_aio_attach_aio_context, +> +> +.bdrv_co_truncate = raw_co_truncate, +> +.bdrv_getlength = raw_getlength, +> +> +> +> +I am not too familiar with block device code in QEMU, so not sure if +> +this is the right fix or if there are some underlying problems. +Oh this is quite embarassing! I only added the bdrv_attach_aio_context +callback for the file-backed device. Your fix is definitely corect for +host device. Let me make sure there weren't any others missed and I will +send out a properly formatted patch. Thank you for the quick testing and +turnaround! + +-Nish + +On 07/18/2018 08:52 PM, Nishanth Aravamudan wrote: +> +On 18.07.2018 [11:10:27 -0400], Farhan Ali wrote: +> +> +> +> +> +> On 07/18/2018 09:42 AM, Farhan Ali wrote: +> +>> +> +>> +> +>> On 07/17/2018 04:52 PM, Nishanth Aravamudan wrote: +> +>>> iiuc, this possibly implies AIO was not actually used previously on this +> +>>> guest (it might have silently been falling back to threaded IO?). I +> +>>> don't have access to s390x, but would it be possible to run qemu under +> +>>> gdb and see if aio_setup_linux_aio is being called at all (I think it +> +>>> might not be, but I'm not sure why), and if so, if it's for the context +> +>>> in question? +> +>>> +> +>>> If it's not being called first, could you see what callpath is calling +> +>>> aio_get_linux_aio when this assertion trips? +> +>>> +> +>>> Thanks! +> +>>> -Nish +> +>> +> +>> +> +>> Hi Nishant, +> +>> +> +>> From the coredump of the guest this is the call trace that calls +> +>> aio_get_linux_aio: +> +>> +> +>> +> +>> Stack trace of thread 145158: +> +>> #0 0x000003ff94dbe274 raise (libc.so.6) +> +>> #1 0x000003ff94da39a8 abort (libc.so.6) +> +>> #2 0x000003ff94db62ce __assert_fail_base (libc.so.6) +> +>> #3 0x000003ff94db634c __assert_fail (libc.so.6) +> +>> #4 0x000002aa20db067a aio_get_linux_aio (qemu-system-s390x) +> +>> #5 0x000002aa20d229a8 raw_aio_plug (qemu-system-s390x) +> +>> #6 0x000002aa20d309ee bdrv_io_plug (qemu-system-s390x) +> +>> #7 0x000002aa20b5a8ea virtio_blk_handle_vq (qemu-system-s390x) +> +>> #8 0x000002aa20db2f6e aio_dispatch_handlers (qemu-system-s390x) +> +>> #9 0x000002aa20db3c34 aio_poll (qemu-system-s390x) +> +>> #10 0x000002aa20be32a2 iothread_run (qemu-system-s390x) +> +>> #11 0x000003ff94f879a8 start_thread (libpthread.so.0) +> +>> #12 0x000003ff94e797ee thread_start (libc.so.6) +> +>> +> +>> +> +>> Thanks for taking a look and responding. +> +>> +> +>> Thanks +> +>> Farhan +> +>> +> +>> +> +>> +> +> +> +> Trying to debug a little further, the block device in this case is a "host +> +> device". And looking at your commit carefully you use the +> +> bdrv_attach_aio_context callback to setup a Linux AioContext. +> +> +> +> For some reason the "host device" struct (BlockDriver bdrv_host_device in +> +> block/file-posix.c) does not have a bdrv_attach_aio_context defined. +> +> So a simple change of adding the callback to the struct solves the issue and +> +> the guest starts fine. +> +> +> +> +> +> diff --git a/block/file-posix.c b/block/file-posix.c +> +> index 28824aa..b8d59fb 100644 +> +> --- a/block/file-posix.c +> +> +++ b/block/file-posix.c +> +> @@ -3135,6 +3135,7 @@ static BlockDriver bdrv_host_device = { +> +> .bdrv_refresh_limits = raw_refresh_limits, +> +> .bdrv_io_plug = raw_aio_plug, +> +> .bdrv_io_unplug = raw_aio_unplug, +> +> + .bdrv_attach_aio_context = raw_aio_attach_aio_context, +> +> +> +> .bdrv_co_truncate = raw_co_truncate, +> +> .bdrv_getlength = raw_getlength, +> +> +> +> +> +> +> +> I am not too familiar with block device code in QEMU, so not sure if +> +> this is the right fix or if there are some underlying problems. +> +> +Oh this is quite embarassing! I only added the bdrv_attach_aio_context +> +callback for the file-backed device. Your fix is definitely corect for +> +host device. Let me make sure there weren't any others missed and I will +> +send out a properly formatted patch. Thank you for the quick testing and +> +turnaround! +Farhan, can you respin your patch with proper sign-off and patch description? +Adding qemu-block. + +Hi Christian, + +On 19.07.2018 [08:55:20 +0200], Christian Borntraeger wrote: +> +> +> +On 07/18/2018 08:52 PM, Nishanth Aravamudan wrote: +> +> On 18.07.2018 [11:10:27 -0400], Farhan Ali wrote: +> +>> +> +>> +> +>> On 07/18/2018 09:42 AM, Farhan Ali wrote: +<snip> + +> +>> I am not too familiar with block device code in QEMU, so not sure if +> +>> this is the right fix or if there are some underlying problems. +> +> +> +> Oh this is quite embarassing! I only added the bdrv_attach_aio_context +> +> callback for the file-backed device. Your fix is definitely corect for +> +> host device. Let me make sure there weren't any others missed and I will +> +> send out a properly formatted patch. Thank you for the quick testing and +> +> turnaround! +> +> +Farhan, can you respin your patch with proper sign-off and patch description? +> +Adding qemu-block. +I sent it yesterday, sorry I didn't cc everyone from this e-mail: +http://lists.nongnu.org/archive/html/qemu-block/2018-07/msg00516.html +Thanks, +Nish + diff --git a/results/classifier/012/kernel virtual machine/36568044 b/results/classifier/012/kernel virtual machine/36568044 new file mode 100644 index 00000000..4f8642bb --- /dev/null +++ b/results/classifier/012/kernel virtual machine/36568044 @@ -0,0 +1,4599 @@ +kernel virtual machine: 0.962 +mistranslation: 0.962 +risc-v: 0.960 +debug: 0.939 +arm: 0.934 +device: 0.931 +graphic: 0.931 +other: 0.930 +permissions: 0.927 +assembly: 0.926 +PID: 0.926 +architecture: 0.925 +register: 0.924 +semantic: 0.923 +performance: 0.920 +socket: 0.907 +vnc: 0.905 +network: 0.904 +boot: 0.895 +TCG: 0.889 +files: 0.884 +x86: 0.805 + +[BUG, RFC] cpr-transfer: qxl guest driver crashes after migration + +Hi all, + +We've been experimenting with cpr-transfer migration mode recently and +have discovered the following issue with the guest QXL driver: + +Run migration source: +> +EMULATOR=/path/to/emulator +> +ROOTFS=/path/to/image +> +QMPSOCK=/var/run/alma8qmp-src.sock +> +> +$EMULATOR -enable-kvm \ +> +-machine q35 \ +> +-cpu host -smp 2 -m 2G \ +> +-object +> +memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ram0,share=on\ +> +-machine memory-backend=ram0 \ +> +-machine aux-ram-share=on \ +> +-drive file=$ROOTFS,media=disk,if=virtio \ +> +-qmp unix:$QMPSOCK,server=on,wait=off \ +> +-nographic \ +> +-device qxl-vga +Run migration target: +> +EMULATOR=/path/to/emulator +> +ROOTFS=/path/to/image +> +QMPSOCK=/var/run/alma8qmp-dst.sock +> +> +> +> +$EMULATOR -enable-kvm \ +> +-machine q35 \ +> +-cpu host -smp 2 -m 2G \ +> +-object +> +memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ram0,share=on\ +> +-machine memory-backend=ram0 \ +> +-machine aux-ram-share=on \ +> +-drive file=$ROOTFS,media=disk,if=virtio \ +> +-qmp unix:$QMPSOCK,server=on,wait=off \ +> +-nographic \ +> +-device qxl-vga \ +> +-incoming tcp:0:44444 \ +> +-incoming '{"channel-type": "cpr", "addr": { "transport": "socket", +> +"type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' +Launch the migration: +> +QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell +> +QMPSOCK=/var/run/alma8qmp-src.sock +> +> +$QMPSHELL -p $QMPSOCK <<EOF +> +migrate-set-parameters mode=cpr-transfer +> +migrate +> +channels=[{"channel-type":"main","addr":{"transport":"socket","type":"inet","host":"0","port":"44444"}},{"channel-type":"cpr","addr":{"transport":"socket","type":"unix","path":"/var/run/alma8cpr-dst.sock"}}] +> +EOF +Then, after a while, QXL guest driver on target crashes spewing the +following messages: +> +[ 73.962002] [TTM] Buffer eviction failed +> +[ 73.962072] qxl 0000:00:02.0: object_init failed for (3149824, 0x00000001) +> +[ 73.962081] [drm:qxl_alloc_bo_reserved [qxl]] *ERROR* failed to allocate +> +VRAM BO +That seems to be a known kernel QXL driver bug: +https://lore.kernel.org/all/20220907094423.93581-1-min_halo@163.com/T/ +https://lore.kernel.org/lkml/ZTgydqRlK6WX_b29@eldamar.lan/ +(the latter discussion contains that reproduce script which speeds up +the crash in the guest): +> +#!/bin/bash +> +> +chvt 3 +> +> +for j in $(seq 80); do +> +echo "$(date) starting round $j" +> +if [ "$(journalctl --boot | grep "failed to allocate VRAM BO")" != "" +> +]; then +> +echo "bug was reproduced after $j tries" +> +exit 1 +> +fi +> +for i in $(seq 100); do +> +dmesg > /dev/tty3 +> +done +> +done +> +> +echo "bug could not be reproduced" +> +exit 0 +The bug itself seems to remain unfixed, as I was able to reproduce that +with Fedora 41 guest, as well as AlmaLinux 8 guest. However our +cpr-transfer code also seems to be buggy as it triggers the crash - +without the cpr-transfer migration the above reproduce doesn't lead to +crash on the source VM. + +I suspect that, as cpr-transfer doesn't migrate the guest memory, but +rather passes it through the memory backend object, our code might +somehow corrupt the VRAM. However, I wasn't able to trace the +corruption so far. + +Could somebody help the investigation and take a look into this? Any +suggestions would be appreciated. Thanks! + +Andrey + +On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: +Hi all, + +We've been experimenting with cpr-transfer migration mode recently and +have discovered the following issue with the guest QXL driver: + +Run migration source: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-src.sock + +$EMULATOR -enable-kvm \ + -machine q35 \ + -cpu host -smp 2 -m 2G \ + -object +memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ram0,share=on\ + -machine memory-backend=ram0 \ + -machine aux-ram-share=on \ + -drive file=$ROOTFS,media=disk,if=virtio \ + -qmp unix:$QMPSOCK,server=on,wait=off \ + -nographic \ + -device qxl-vga +Run migration target: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-dst.sock +$EMULATOR -enable-kvm \ +-machine q35 \ + -cpu host -smp 2 -m 2G \ + -object +memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ram0,share=on\ + -machine memory-backend=ram0 \ + -machine aux-ram-share=on \ + -drive file=$ROOTFS,media=disk,if=virtio \ + -qmp unix:$QMPSOCK,server=on,wait=off \ + -nographic \ + -device qxl-vga \ + -incoming tcp:0:44444 \ + -incoming '{"channel-type": "cpr", "addr": { "transport": "socket", "type": "unix", +"path": "/var/run/alma8cpr-dst.sock"}}' +Launch the migration: +QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell +QMPSOCK=/var/run/alma8qmp-src.sock + +$QMPSHELL -p $QMPSOCK <<EOF + migrate-set-parameters mode=cpr-transfer + migrate +channels=[{"channel-type":"main","addr":{"transport":"socket","type":"inet","host":"0","port":"44444"}},{"channel-type":"cpr","addr":{"transport":"socket","type":"unix","path":"/var/run/alma8cpr-dst.sock"}}] +EOF +Then, after a while, QXL guest driver on target crashes spewing the +following messages: +[ 73.962002] [TTM] Buffer eviction failed +[ 73.962072] qxl 0000:00:02.0: object_init failed for (3149824, 0x00000001) +[ 73.962081] [drm:qxl_alloc_bo_reserved [qxl]] *ERROR* failed to allocate +VRAM BO +That seems to be a known kernel QXL driver bug: +https://lore.kernel.org/all/20220907094423.93581-1-min_halo@163.com/T/ +https://lore.kernel.org/lkml/ZTgydqRlK6WX_b29@eldamar.lan/ +(the latter discussion contains that reproduce script which speeds up +the crash in the guest): +#!/bin/bash + +chvt 3 + +for j in $(seq 80); do + echo "$(date) starting round $j" + if [ "$(journalctl --boot | grep "failed to allocate VRAM BO")" != "" +]; then + echo "bug was reproduced after $j tries" + exit 1 + fi + for i in $(seq 100); do + dmesg > /dev/tty3 + done +done + +echo "bug could not be reproduced" +exit 0 +The bug itself seems to remain unfixed, as I was able to reproduce that +with Fedora 41 guest, as well as AlmaLinux 8 guest. However our +cpr-transfer code also seems to be buggy as it triggers the crash - +without the cpr-transfer migration the above reproduce doesn't lead to +crash on the source VM. + +I suspect that, as cpr-transfer doesn't migrate the guest memory, but +rather passes it through the memory backend object, our code might +somehow corrupt the VRAM. However, I wasn't able to trace the +corruption so far. + +Could somebody help the investigation and take a look into this? Any +suggestions would be appreciated. Thanks! +Possibly some memory region created by qxl is not being preserved. +Try adding these traces to see what is preserved: + +-trace enable='*cpr*' +-trace enable='*ram_alloc*' + +- Steve + +On 2/28/2025 1:13 PM, Steven Sistare wrote: +On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: +Hi all, + +We've been experimenting with cpr-transfer migration mode recently and +have discovered the following issue with the guest QXL driver: + +Run migration source: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-src.sock + +$EMULATOR -enable-kvm \ +    -machine q35 \ +    -cpu host -smp 2 -m 2G \ +    -object +memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ram0,share=on\ +    -machine memory-backend=ram0 \ +    -machine aux-ram-share=on \ +    -drive file=$ROOTFS,media=disk,if=virtio \ +    -qmp unix:$QMPSOCK,server=on,wait=off \ +    -nographic \ +    -device qxl-vga +Run migration target: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-dst.sock +$EMULATOR -enable-kvm \ +    -machine q35 \ +    -cpu host -smp 2 -m 2G \ +    -object +memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ram0,share=on\ +    -machine memory-backend=ram0 \ +    -machine aux-ram-share=on \ +    -drive file=$ROOTFS,media=disk,if=virtio \ +    -qmp unix:$QMPSOCK,server=on,wait=off \ +    -nographic \ +    -device qxl-vga \ +    -incoming tcp:0:44444 \ +    -incoming '{"channel-type": "cpr", "addr": { "transport": "socket", "type": "unix", +"path": "/var/run/alma8cpr-dst.sock"}}' +Launch the migration: +QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell +QMPSOCK=/var/run/alma8qmp-src.sock + +$QMPSHELL -p $QMPSOCK <<EOF +    migrate-set-parameters mode=cpr-transfer +    migrate +channels=[{"channel-type":"main","addr":{"transport":"socket","type":"inet","host":"0","port":"44444"}},{"channel-type":"cpr","addr":{"transport":"socket","type":"unix","path":"/var/run/alma8cpr-dst.sock"}}] +EOF +Then, after a while, QXL guest driver on target crashes spewing the +following messages: +[  73.962002] [TTM] Buffer eviction failed +[  73.962072] qxl 0000:00:02.0: object_init failed for (3149824, 0x00000001) +[  73.962081] [drm:qxl_alloc_bo_reserved [qxl]] *ERROR* failed to allocate +VRAM BO +That seems to be a known kernel QXL driver bug: +https://lore.kernel.org/all/20220907094423.93581-1-min_halo@163.com/T/ +https://lore.kernel.org/lkml/ZTgydqRlK6WX_b29@eldamar.lan/ +(the latter discussion contains that reproduce script which speeds up +the crash in the guest): +#!/bin/bash + +chvt 3 + +for j in $(seq 80); do +        echo "$(date) starting round $j" +        if [ "$(journalctl --boot | grep "failed to allocate VRAM BO")" != "" +]; then +                echo "bug was reproduced after $j tries" +                exit 1 +        fi +        for i in $(seq 100); do +                dmesg > /dev/tty3 +        done +done + +echo "bug could not be reproduced" +exit 0 +The bug itself seems to remain unfixed, as I was able to reproduce that +with Fedora 41 guest, as well as AlmaLinux 8 guest. However our +cpr-transfer code also seems to be buggy as it triggers the crash - +without the cpr-transfer migration the above reproduce doesn't lead to +crash on the source VM. + +I suspect that, as cpr-transfer doesn't migrate the guest memory, but +rather passes it through the memory backend object, our code might +somehow corrupt the VRAM. However, I wasn't able to trace the +corruption so far. + +Could somebody help the investigation and take a look into this? Any +suggestions would be appreciated. Thanks! +Possibly some memory region created by qxl is not being preserved. +Try adding these traces to see what is preserved: + +-trace enable='*cpr*' +-trace enable='*ram_alloc*' +Also try adding this patch to see if it flags any ram blocks as not +compatible with cpr. A message is printed at migration start time. +1740667681-257312-1-git-send-email-steven.sistare@oracle.com +/">https://lore.kernel.org/qemu-devel/ +1740667681-257312-1-git-send-email-steven.sistare@oracle.com +/ +- Steve + +On 2/28/25 8:20 PM, Steven Sistare wrote: +> +On 2/28/2025 1:13 PM, Steven Sistare wrote: +> +> On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: +> +>> Hi all, +> +>> +> +>> We've been experimenting with cpr-transfer migration mode recently and +> +>> have discovered the following issue with the guest QXL driver: +> +>> +> +>> Run migration source: +> +>>> EMULATOR=/path/to/emulator +> +>>> ROOTFS=/path/to/image +> +>>> QMPSOCK=/var/run/alma8qmp-src.sock +> +>>> +> +>>> $EMULATOR -enable-kvm \ +> +>>>     -machine q35 \ +> +>>>     -cpu host -smp 2 -m 2G \ +> +>>>     -object memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ +> +>>> ram0,share=on\ +> +>>>     -machine memory-backend=ram0 \ +> +>>>     -machine aux-ram-share=on \ +> +>>>     -drive file=$ROOTFS,media=disk,if=virtio \ +> +>>>     -qmp unix:$QMPSOCK,server=on,wait=off \ +> +>>>     -nographic \ +> +>>>     -device qxl-vga +> +>> +> +>> Run migration target: +> +>>> EMULATOR=/path/to/emulator +> +>>> ROOTFS=/path/to/image +> +>>> QMPSOCK=/var/run/alma8qmp-dst.sock +> +>>> $EMULATOR -enable-kvm \ +> +>>>     -machine q35 \ +> +>>>     -cpu host -smp 2 -m 2G \ +> +>>>     -object memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ +> +>>> ram0,share=on\ +> +>>>     -machine memory-backend=ram0 \ +> +>>>     -machine aux-ram-share=on \ +> +>>>     -drive file=$ROOTFS,media=disk,if=virtio \ +> +>>>     -qmp unix:$QMPSOCK,server=on,wait=off \ +> +>>>     -nographic \ +> +>>>     -device qxl-vga \ +> +>>>     -incoming tcp:0:44444 \ +> +>>>     -incoming '{"channel-type": "cpr", "addr": { "transport": +> +>>> "socket", "type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' +> +>> +> +>> +> +>> Launch the migration: +> +>>> QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell +> +>>> QMPSOCK=/var/run/alma8qmp-src.sock +> +>>> +> +>>> $QMPSHELL -p $QMPSOCK <<EOF +> +>>>     migrate-set-parameters mode=cpr-transfer +> +>>>     migrate channels=[{"channel-type":"main","addr": +> +>>> {"transport":"socket","type":"inet","host":"0","port":"44444"}}, +> +>>> {"channel-type":"cpr","addr": +> +>>> {"transport":"socket","type":"unix","path":"/var/run/alma8cpr- +> +>>> dst.sock"}}] +> +>>> EOF +> +>> +> +>> Then, after a while, QXL guest driver on target crashes spewing the +> +>> following messages: +> +>>> [  73.962002] [TTM] Buffer eviction failed +> +>>> [  73.962072] qxl 0000:00:02.0: object_init failed for (3149824, +> +>>> 0x00000001) +> +>>> [  73.962081] [drm:qxl_alloc_bo_reserved [qxl]] *ERROR* failed to +> +>>> allocate VRAM BO +> +>> +> +>> That seems to be a known kernel QXL driver bug: +> +>> +> +>> +https://lore.kernel.org/all/20220907094423.93581-1-min_halo@163.com/T/ +> +>> +https://lore.kernel.org/lkml/ZTgydqRlK6WX_b29@eldamar.lan/ +> +>> +> +>> (the latter discussion contains that reproduce script which speeds up +> +>> the crash in the guest): +> +>>> #!/bin/bash +> +>>> +> +>>> chvt 3 +> +>>> +> +>>> for j in $(seq 80); do +> +>>>         echo "$(date) starting round $j" +> +>>>         if [ "$(journalctl --boot | grep "failed to allocate VRAM +> +>>> BO")" != "" ]; then +> +>>>                 echo "bug was reproduced after $j tries" +> +>>>                 exit 1 +> +>>>         fi +> +>>>         for i in $(seq 100); do +> +>>>                 dmesg > /dev/tty3 +> +>>>         done +> +>>> done +> +>>> +> +>>> echo "bug could not be reproduced" +> +>>> exit 0 +> +>> +> +>> The bug itself seems to remain unfixed, as I was able to reproduce that +> +>> with Fedora 41 guest, as well as AlmaLinux 8 guest. However our +> +>> cpr-transfer code also seems to be buggy as it triggers the crash - +> +>> without the cpr-transfer migration the above reproduce doesn't lead to +> +>> crash on the source VM. +> +>> +> +>> I suspect that, as cpr-transfer doesn't migrate the guest memory, but +> +>> rather passes it through the memory backend object, our code might +> +>> somehow corrupt the VRAM. However, I wasn't able to trace the +> +>> corruption so far. +> +>> +> +>> Could somebody help the investigation and take a look into this? Any +> +>> suggestions would be appreciated. Thanks! +> +> +> +> Possibly some memory region created by qxl is not being preserved. +> +> Try adding these traces to see what is preserved: +> +> +> +> -trace enable='*cpr*' +> +> -trace enable='*ram_alloc*' +> +> +Also try adding this patch to see if it flags any ram blocks as not +> +compatible with cpr. A message is printed at migration start time. +> + +https://lore.kernel.org/qemu-devel/1740667681-257312-1-git-send-email- +> +steven.sistare@oracle.com/ +> +> +- Steve +> +With the traces enabled + the "migration: ram block cpr blockers" patch +applied: + +Source: +> +cpr_find_fd pc.bios, id 0 returns -1 +> +cpr_save_fd pc.bios, id 0, fd 22 +> +qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 22 host +> +0x7fec18e00000 +> +cpr_find_fd pc.rom, id 0 returns -1 +> +cpr_save_fd pc.rom, id 0, fd 23 +> +qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 23 host +> +0x7fec18c00000 +> +cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns -1 +> +cpr_save_fd 0000:00:01.0/e1000e.rom, id 0, fd 24 +> +qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size 262144 fd +> +24 host 0x7fec18a00000 +> +cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns -1 +> +cpr_save_fd 0000:00:02.0/vga.vram, id 0, fd 25 +> +qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size 67108864 +> +fd 25 host 0x7feb77e00000 +> +cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns -1 +> +cpr_save_fd 0000:00:02.0/qxl.vrom, id 0, fd 27 +> +qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 fd 27 +> +host 0x7fec18800000 +> +cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns -1 +> +cpr_save_fd 0000:00:02.0/qxl.vram, id 0, fd 28 +> +qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size 67108864 +> +fd 28 host 0x7feb73c00000 +> +cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns -1 +> +cpr_save_fd 0000:00:02.0/qxl.rom, id 0, fd 34 +> +qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 fd 34 +> +host 0x7fec18600000 +> +cpr_find_fd /rom@etc/acpi/tables, id 0 returns -1 +> +cpr_save_fd /rom@etc/acpi/tables, id 0, fd 35 +> +qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size 2097152 fd 35 +> +host 0x7fec18200000 +> +cpr_find_fd /rom@etc/table-loader, id 0 returns -1 +> +cpr_save_fd /rom@etc/table-loader, id 0, fd 36 +> +qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 fd 36 +> +host 0x7feb8b600000 +> +cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns -1 +> +cpr_save_fd /rom@etc/acpi/rsdp, id 0, fd 37 +> +qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd 37 host +> +0x7feb8b400000 +> +> +cpr_state_save cpr-transfer mode +> +cpr_transfer_output /var/run/alma8cpr-dst.sock +Target: +> +cpr_transfer_input /var/run/alma8cpr-dst.sock +> +cpr_state_load cpr-transfer mode +> +cpr_find_fd pc.bios, id 0 returns 20 +> +qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 20 host +> +0x7fcdc9800000 +> +cpr_find_fd pc.rom, id 0 returns 19 +> +qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 19 host +> +0x7fcdc9600000 +> +cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns 18 +> +qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size 262144 fd +> +18 host 0x7fcdc9400000 +> +cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns 17 +> +qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size 67108864 +> +fd 17 host 0x7fcd27e00000 +> +cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns 16 +> +qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 fd 16 +> +host 0x7fcdc9200000 +> +cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns 15 +> +qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size 67108864 +> +fd 15 host 0x7fcd23c00000 +> +cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns 14 +> +qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 fd 14 +> +host 0x7fcdc8800000 +> +cpr_find_fd /rom@etc/acpi/tables, id 0 returns 13 +> +qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size 2097152 fd 13 +> +host 0x7fcdc8400000 +> +cpr_find_fd /rom@etc/table-loader, id 0 returns 11 +> +qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 fd 11 +> +host 0x7fcdc8200000 +> +cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns 10 +> +qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd 10 host +> +0x7fcd3be00000 +Looks like both vga.vram and qxl.vram are being preserved (with the same +addresses), and no incompatible ram blocks are found during migration. + +Andrey + +On 2/28/25 8:35 PM, Andrey Drobyshev wrote: +> +On 2/28/25 8:20 PM, Steven Sistare wrote: +> +> On 2/28/2025 1:13 PM, Steven Sistare wrote: +> +>> On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: +> +>>> Hi all, +> +>>> +> +>>> We've been experimenting with cpr-transfer migration mode recently and +> +>>> have discovered the following issue with the guest QXL driver: +> +>>> +> +>>> Run migration source: +> +>>>> EMULATOR=/path/to/emulator +> +>>>> ROOTFS=/path/to/image +> +>>>> QMPSOCK=/var/run/alma8qmp-src.sock +> +>>>> +> +>>>> $EMULATOR -enable-kvm \ +> +>>>>     -machine q35 \ +> +>>>>     -cpu host -smp 2 -m 2G \ +> +>>>>     -object memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ +> +>>>> ram0,share=on\ +> +>>>>     -machine memory-backend=ram0 \ +> +>>>>     -machine aux-ram-share=on \ +> +>>>>     -drive file=$ROOTFS,media=disk,if=virtio \ +> +>>>>     -qmp unix:$QMPSOCK,server=on,wait=off \ +> +>>>>     -nographic \ +> +>>>>     -device qxl-vga +> +>>> +> +>>> Run migration target: +> +>>>> EMULATOR=/path/to/emulator +> +>>>> ROOTFS=/path/to/image +> +>>>> QMPSOCK=/var/run/alma8qmp-dst.sock +> +>>>> $EMULATOR -enable-kvm \ +> +>>>>     -machine q35 \ +> +>>>>     -cpu host -smp 2 -m 2G \ +> +>>>>     -object memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ +> +>>>> ram0,share=on\ +> +>>>>     -machine memory-backend=ram0 \ +> +>>>>     -machine aux-ram-share=on \ +> +>>>>     -drive file=$ROOTFS,media=disk,if=virtio \ +> +>>>>     -qmp unix:$QMPSOCK,server=on,wait=off \ +> +>>>>     -nographic \ +> +>>>>     -device qxl-vga \ +> +>>>>     -incoming tcp:0:44444 \ +> +>>>>     -incoming '{"channel-type": "cpr", "addr": { "transport": +> +>>>> "socket", "type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' +> +>>> +> +>>> +> +>>> Launch the migration: +> +>>>> QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell +> +>>>> QMPSOCK=/var/run/alma8qmp-src.sock +> +>>>> +> +>>>> $QMPSHELL -p $QMPSOCK <<EOF +> +>>>>     migrate-set-parameters mode=cpr-transfer +> +>>>>     migrate channels=[{"channel-type":"main","addr": +> +>>>> {"transport":"socket","type":"inet","host":"0","port":"44444"}}, +> +>>>> {"channel-type":"cpr","addr": +> +>>>> {"transport":"socket","type":"unix","path":"/var/run/alma8cpr- +> +>>>> dst.sock"}}] +> +>>>> EOF +> +>>> +> +>>> Then, after a while, QXL guest driver on target crashes spewing the +> +>>> following messages: +> +>>>> [  73.962002] [TTM] Buffer eviction failed +> +>>>> [  73.962072] qxl 0000:00:02.0: object_init failed for (3149824, +> +>>>> 0x00000001) +> +>>>> [  73.962081] [drm:qxl_alloc_bo_reserved [qxl]] *ERROR* failed to +> +>>>> allocate VRAM BO +> +>>> +> +>>> That seems to be a known kernel QXL driver bug: +> +>>> +> +>>> +https://lore.kernel.org/all/20220907094423.93581-1-min_halo@163.com/T/ +> +>>> +https://lore.kernel.org/lkml/ZTgydqRlK6WX_b29@eldamar.lan/ +> +>>> +> +>>> (the latter discussion contains that reproduce script which speeds up +> +>>> the crash in the guest): +> +>>>> #!/bin/bash +> +>>>> +> +>>>> chvt 3 +> +>>>> +> +>>>> for j in $(seq 80); do +> +>>>>         echo "$(date) starting round $j" +> +>>>>         if [ "$(journalctl --boot | grep "failed to allocate VRAM +> +>>>> BO")" != "" ]; then +> +>>>>                 echo "bug was reproduced after $j tries" +> +>>>>                 exit 1 +> +>>>>         fi +> +>>>>         for i in $(seq 100); do +> +>>>>                 dmesg > /dev/tty3 +> +>>>>         done +> +>>>> done +> +>>>> +> +>>>> echo "bug could not be reproduced" +> +>>>> exit 0 +> +>>> +> +>>> The bug itself seems to remain unfixed, as I was able to reproduce that +> +>>> with Fedora 41 guest, as well as AlmaLinux 8 guest. However our +> +>>> cpr-transfer code also seems to be buggy as it triggers the crash - +> +>>> without the cpr-transfer migration the above reproduce doesn't lead to +> +>>> crash on the source VM. +> +>>> +> +>>> I suspect that, as cpr-transfer doesn't migrate the guest memory, but +> +>>> rather passes it through the memory backend object, our code might +> +>>> somehow corrupt the VRAM. However, I wasn't able to trace the +> +>>> corruption so far. +> +>>> +> +>>> Could somebody help the investigation and take a look into this? Any +> +>>> suggestions would be appreciated. Thanks! +> +>> +> +>> Possibly some memory region created by qxl is not being preserved. +> +>> Try adding these traces to see what is preserved: +> +>> +> +>> -trace enable='*cpr*' +> +>> -trace enable='*ram_alloc*' +> +> +> +> Also try adding this patch to see if it flags any ram blocks as not +> +> compatible with cpr. A message is printed at migration start time. +> +>  +https://lore.kernel.org/qemu-devel/1740667681-257312-1-git-send-email- +> +> steven.sistare@oracle.com/ +> +> +> +> - Steve +> +> +> +> +With the traces enabled + the "migration: ram block cpr blockers" patch +> +applied: +> +> +Source: +> +> cpr_find_fd pc.bios, id 0 returns -1 +> +> cpr_save_fd pc.bios, id 0, fd 22 +> +> qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 22 host +> +> 0x7fec18e00000 +> +> cpr_find_fd pc.rom, id 0 returns -1 +> +> cpr_save_fd pc.rom, id 0, fd 23 +> +> qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 23 host +> +> 0x7fec18c00000 +> +> cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns -1 +> +> cpr_save_fd 0000:00:01.0/e1000e.rom, id 0, fd 24 +> +> qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size 262144 fd +> +> 24 host 0x7fec18a00000 +> +> cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns -1 +> +> cpr_save_fd 0000:00:02.0/vga.vram, id 0, fd 25 +> +> qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size 67108864 +> +> fd 25 host 0x7feb77e00000 +> +> cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns -1 +> +> cpr_save_fd 0000:00:02.0/qxl.vrom, id 0, fd 27 +> +> qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 fd 27 +> +> host 0x7fec18800000 +> +> cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns -1 +> +> cpr_save_fd 0000:00:02.0/qxl.vram, id 0, fd 28 +> +> qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size 67108864 +> +> fd 28 host 0x7feb73c00000 +> +> cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns -1 +> +> cpr_save_fd 0000:00:02.0/qxl.rom, id 0, fd 34 +> +> qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 fd 34 +> +> host 0x7fec18600000 +> +> cpr_find_fd /rom@etc/acpi/tables, id 0 returns -1 +> +> cpr_save_fd /rom@etc/acpi/tables, id 0, fd 35 +> +> qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size 2097152 fd +> +> 35 host 0x7fec18200000 +> +> cpr_find_fd /rom@etc/table-loader, id 0 returns -1 +> +> cpr_save_fd /rom@etc/table-loader, id 0, fd 36 +> +> qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 fd 36 +> +> host 0x7feb8b600000 +> +> cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns -1 +> +> cpr_save_fd /rom@etc/acpi/rsdp, id 0, fd 37 +> +> qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd 37 host +> +> 0x7feb8b400000 +> +> +> +> cpr_state_save cpr-transfer mode +> +> cpr_transfer_output /var/run/alma8cpr-dst.sock +> +> +Target: +> +> cpr_transfer_input /var/run/alma8cpr-dst.sock +> +> cpr_state_load cpr-transfer mode +> +> cpr_find_fd pc.bios, id 0 returns 20 +> +> qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 20 host +> +> 0x7fcdc9800000 +> +> cpr_find_fd pc.rom, id 0 returns 19 +> +> qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 19 host +> +> 0x7fcdc9600000 +> +> cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns 18 +> +> qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size 262144 fd +> +> 18 host 0x7fcdc9400000 +> +> cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns 17 +> +> qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size 67108864 +> +> fd 17 host 0x7fcd27e00000 +> +> cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns 16 +> +> qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 fd 16 +> +> host 0x7fcdc9200000 +> +> cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns 15 +> +> qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size 67108864 +> +> fd 15 host 0x7fcd23c00000 +> +> cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns 14 +> +> qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 fd 14 +> +> host 0x7fcdc8800000 +> +> cpr_find_fd /rom@etc/acpi/tables, id 0 returns 13 +> +> qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size 2097152 fd +> +> 13 host 0x7fcdc8400000 +> +> cpr_find_fd /rom@etc/table-loader, id 0 returns 11 +> +> qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 fd 11 +> +> host 0x7fcdc8200000 +> +> cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns 10 +> +> qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd 10 host +> +> 0x7fcd3be00000 +> +> +Looks like both vga.vram and qxl.vram are being preserved (with the same +> +addresses), and no incompatible ram blocks are found during migration. +> +Sorry, addressed are not the same, of course. However corresponding ram +blocks do seem to be preserved and initialized. + +On 2/28/2025 1:37 PM, Andrey Drobyshev wrote: +On 2/28/25 8:35 PM, Andrey Drobyshev wrote: +On 2/28/25 8:20 PM, Steven Sistare wrote: +On 2/28/2025 1:13 PM, Steven Sistare wrote: +On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: +Hi all, + +We've been experimenting with cpr-transfer migration mode recently and +have discovered the following issue with the guest QXL driver: + +Run migration source: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-src.sock + +$EMULATOR -enable-kvm \ +     -machine q35 \ +     -cpu host -smp 2 -m 2G \ +     -object memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ +ram0,share=on\ +     -machine memory-backend=ram0 \ +     -machine aux-ram-share=on \ +     -drive file=$ROOTFS,media=disk,if=virtio \ +     -qmp unix:$QMPSOCK,server=on,wait=off \ +     -nographic \ +     -device qxl-vga +Run migration target: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-dst.sock +$EMULATOR -enable-kvm \ +     -machine q35 \ +     -cpu host -smp 2 -m 2G \ +     -object memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ +ram0,share=on\ +     -machine memory-backend=ram0 \ +     -machine aux-ram-share=on \ +     -drive file=$ROOTFS,media=disk,if=virtio \ +     -qmp unix:$QMPSOCK,server=on,wait=off \ +     -nographic \ +     -device qxl-vga \ +     -incoming tcp:0:44444 \ +     -incoming '{"channel-type": "cpr", "addr": { "transport": +"socket", "type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' +Launch the migration: +QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell +QMPSOCK=/var/run/alma8qmp-src.sock + +$QMPSHELL -p $QMPSOCK <<EOF +     migrate-set-parameters mode=cpr-transfer +     migrate channels=[{"channel-type":"main","addr": +{"transport":"socket","type":"inet","host":"0","port":"44444"}}, +{"channel-type":"cpr","addr": +{"transport":"socket","type":"unix","path":"/var/run/alma8cpr- +dst.sock"}}] +EOF +Then, after a while, QXL guest driver on target crashes spewing the +following messages: +[  73.962002] [TTM] Buffer eviction failed +[  73.962072] qxl 0000:00:02.0: object_init failed for (3149824, +0x00000001) +[  73.962081] [drm:qxl_alloc_bo_reserved [qxl]] *ERROR* failed to +allocate VRAM BO +That seems to be a known kernel QXL driver bug: +https://lore.kernel.org/all/20220907094423.93581-1-min_halo@163.com/T/ +https://lore.kernel.org/lkml/ZTgydqRlK6WX_b29@eldamar.lan/ +(the latter discussion contains that reproduce script which speeds up +the crash in the guest): +#!/bin/bash + +chvt 3 + +for j in $(seq 80); do +         echo "$(date) starting round $j" +         if [ "$(journalctl --boot | grep "failed to allocate VRAM +BO")" != "" ]; then +                 echo "bug was reproduced after $j tries" +                 exit 1 +         fi +         for i in $(seq 100); do +                 dmesg > /dev/tty3 +         done +done + +echo "bug could not be reproduced" +exit 0 +The bug itself seems to remain unfixed, as I was able to reproduce that +with Fedora 41 guest, as well as AlmaLinux 8 guest. However our +cpr-transfer code also seems to be buggy as it triggers the crash - +without the cpr-transfer migration the above reproduce doesn't lead to +crash on the source VM. + +I suspect that, as cpr-transfer doesn't migrate the guest memory, but +rather passes it through the memory backend object, our code might +somehow corrupt the VRAM. However, I wasn't able to trace the +corruption so far. + +Could somebody help the investigation and take a look into this? Any +suggestions would be appreciated. Thanks! +Possibly some memory region created by qxl is not being preserved. +Try adding these traces to see what is preserved: + +-trace enable='*cpr*' +-trace enable='*ram_alloc*' +Also try adding this patch to see if it flags any ram blocks as not +compatible with cpr. A message is printed at migration start time. +  +https://lore.kernel.org/qemu-devel/1740667681-257312-1-git-send-email- +steven.sistare@oracle.com/ + +- Steve +With the traces enabled + the "migration: ram block cpr blockers" patch +applied: + +Source: +cpr_find_fd pc.bios, id 0 returns -1 +cpr_save_fd pc.bios, id 0, fd 22 +qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 22 host +0x7fec18e00000 +cpr_find_fd pc.rom, id 0 returns -1 +cpr_save_fd pc.rom, id 0, fd 23 +qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 23 host +0x7fec18c00000 +cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns -1 +cpr_save_fd 0000:00:01.0/e1000e.rom, id 0, fd 24 +qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size 262144 fd 24 +host 0x7fec18a00000 +cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns -1 +cpr_save_fd 0000:00:02.0/vga.vram, id 0, fd 25 +qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size 67108864 fd +25 host 0x7feb77e00000 +cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.vrom, id 0, fd 27 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 fd 27 host +0x7fec18800000 +cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.vram, id 0, fd 28 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size 67108864 fd +28 host 0x7feb73c00000 +cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.rom, id 0, fd 34 +qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 fd 34 host +0x7fec18600000 +cpr_find_fd /rom@etc/acpi/tables, id 0 returns -1 +cpr_save_fd /rom@etc/acpi/tables, id 0, fd 35 +qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size 2097152 fd 35 +host 0x7fec18200000 +cpr_find_fd /rom@etc/table-loader, id 0 returns -1 +cpr_save_fd /rom@etc/table-loader, id 0, fd 36 +qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 fd 36 host +0x7feb8b600000 +cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns -1 +cpr_save_fd /rom@etc/acpi/rsdp, id 0, fd 37 +qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd 37 host +0x7feb8b400000 + +cpr_state_save cpr-transfer mode +cpr_transfer_output /var/run/alma8cpr-dst.sock +Target: +cpr_transfer_input /var/run/alma8cpr-dst.sock +cpr_state_load cpr-transfer mode +cpr_find_fd pc.bios, id 0 returns 20 +qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 20 host +0x7fcdc9800000 +cpr_find_fd pc.rom, id 0 returns 19 +qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 19 host +0x7fcdc9600000 +cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns 18 +qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size 262144 fd 18 +host 0x7fcdc9400000 +cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns 17 +qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size 67108864 fd +17 host 0x7fcd27e00000 +cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns 16 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 fd 16 host +0x7fcdc9200000 +cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns 15 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size 67108864 fd +15 host 0x7fcd23c00000 +cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns 14 +qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 fd 14 host +0x7fcdc8800000 +cpr_find_fd /rom@etc/acpi/tables, id 0 returns 13 +qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size 2097152 fd 13 +host 0x7fcdc8400000 +cpr_find_fd /rom@etc/table-loader, id 0 returns 11 +qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 fd 11 host +0x7fcdc8200000 +cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns 10 +qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd 10 host +0x7fcd3be00000 +Looks like both vga.vram and qxl.vram are being preserved (with the same +addresses), and no incompatible ram blocks are found during migration. +Sorry, addressed are not the same, of course. However corresponding ram +blocks do seem to be preserved and initialized. +So far, I have not reproduced the guest driver failure. + +However, I have isolated places where new QEMU improperly writes to +the qxl memory regions prior to starting the guest, by mmap'ing them +readonly after cpr: + + qemu_ram_alloc_internal() + if (reused && (strstr(name, "qxl") || strstr("name", "vga"))) + ram_flags |= RAM_READONLY; + new_block = qemu_ram_alloc_from_fd(...) + +I have attached a draft fix; try it and let me know. +My console window looks fine before and after cpr, using +-vnc $hostip:0 -vga qxl + +- Steve +0001-hw-qxl-cpr-support-preliminary.patch +Description: +Text document + +On 3/4/25 9:05 PM, Steven Sistare wrote: +> +On 2/28/2025 1:37 PM, Andrey Drobyshev wrote: +> +> On 2/28/25 8:35 PM, Andrey Drobyshev wrote: +> +>> On 2/28/25 8:20 PM, Steven Sistare wrote: +> +>>> On 2/28/2025 1:13 PM, Steven Sistare wrote: +> +>>>> On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: +> +>>>>> Hi all, +> +>>>>> +> +>>>>> We've been experimenting with cpr-transfer migration mode recently +> +>>>>> and +> +>>>>> have discovered the following issue with the guest QXL driver: +> +>>>>> +> +>>>>> Run migration source: +> +>>>>>> EMULATOR=/path/to/emulator +> +>>>>>> ROOTFS=/path/to/image +> +>>>>>> QMPSOCK=/var/run/alma8qmp-src.sock +> +>>>>>> +> +>>>>>> $EMULATOR -enable-kvm \ +> +>>>>>>      -machine q35 \ +> +>>>>>>      -cpu host -smp 2 -m 2G \ +> +>>>>>>      -object memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ +> +>>>>>> ram0,share=on\ +> +>>>>>>      -machine memory-backend=ram0 \ +> +>>>>>>      -machine aux-ram-share=on \ +> +>>>>>>      -drive file=$ROOTFS,media=disk,if=virtio \ +> +>>>>>>      -qmp unix:$QMPSOCK,server=on,wait=off \ +> +>>>>>>      -nographic \ +> +>>>>>>      -device qxl-vga +> +>>>>> +> +>>>>> Run migration target: +> +>>>>>> EMULATOR=/path/to/emulator +> +>>>>>> ROOTFS=/path/to/image +> +>>>>>> QMPSOCK=/var/run/alma8qmp-dst.sock +> +>>>>>> $EMULATOR -enable-kvm \ +> +>>>>>>      -machine q35 \ +> +>>>>>>      -cpu host -smp 2 -m 2G \ +> +>>>>>>      -object memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ +> +>>>>>> ram0,share=on\ +> +>>>>>>      -machine memory-backend=ram0 \ +> +>>>>>>      -machine aux-ram-share=on \ +> +>>>>>>      -drive file=$ROOTFS,media=disk,if=virtio \ +> +>>>>>>      -qmp unix:$QMPSOCK,server=on,wait=off \ +> +>>>>>>      -nographic \ +> +>>>>>>      -device qxl-vga \ +> +>>>>>>      -incoming tcp:0:44444 \ +> +>>>>>>      -incoming '{"channel-type": "cpr", "addr": { "transport": +> +>>>>>> "socket", "type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' +> +>>>>> +> +>>>>> +> +>>>>> Launch the migration: +> +>>>>>> QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell +> +>>>>>> QMPSOCK=/var/run/alma8qmp-src.sock +> +>>>>>> +> +>>>>>> $QMPSHELL -p $QMPSOCK <<EOF +> +>>>>>>      migrate-set-parameters mode=cpr-transfer +> +>>>>>>      migrate channels=[{"channel-type":"main","addr": +> +>>>>>> {"transport":"socket","type":"inet","host":"0","port":"44444"}}, +> +>>>>>> {"channel-type":"cpr","addr": +> +>>>>>> {"transport":"socket","type":"unix","path":"/var/run/alma8cpr- +> +>>>>>> dst.sock"}}] +> +>>>>>> EOF +> +>>>>> +> +>>>>> Then, after a while, QXL guest driver on target crashes spewing the +> +>>>>> following messages: +> +>>>>>> [  73.962002] [TTM] Buffer eviction failed +> +>>>>>> [  73.962072] qxl 0000:00:02.0: object_init failed for (3149824, +> +>>>>>> 0x00000001) +> +>>>>>> [  73.962081] [drm:qxl_alloc_bo_reserved [qxl]] *ERROR* failed to +> +>>>>>> allocate VRAM BO +> +>>>>> +> +>>>>> That seems to be a known kernel QXL driver bug: +> +>>>>> +> +>>>>> +https://lore.kernel.org/all/20220907094423.93581-1- +> +>>>>> min_halo@163.com/T/ +> +>>>>> +https://lore.kernel.org/lkml/ZTgydqRlK6WX_b29@eldamar.lan/ +> +>>>>> +> +>>>>> (the latter discussion contains that reproduce script which speeds up +> +>>>>> the crash in the guest): +> +>>>>>> #!/bin/bash +> +>>>>>> +> +>>>>>> chvt 3 +> +>>>>>> +> +>>>>>> for j in $(seq 80); do +> +>>>>>>          echo "$(date) starting round $j" +> +>>>>>>          if [ "$(journalctl --boot | grep "failed to allocate VRAM +> +>>>>>> BO")" != "" ]; then +> +>>>>>>                  echo "bug was reproduced after $j tries" +> +>>>>>>                  exit 1 +> +>>>>>>          fi +> +>>>>>>          for i in $(seq 100); do +> +>>>>>>                  dmesg > /dev/tty3 +> +>>>>>>          done +> +>>>>>> done +> +>>>>>> +> +>>>>>> echo "bug could not be reproduced" +> +>>>>>> exit 0 +> +>>>>> +> +>>>>> The bug itself seems to remain unfixed, as I was able to reproduce +> +>>>>> that +> +>>>>> with Fedora 41 guest, as well as AlmaLinux 8 guest. However our +> +>>>>> cpr-transfer code also seems to be buggy as it triggers the crash - +> +>>>>> without the cpr-transfer migration the above reproduce doesn't +> +>>>>> lead to +> +>>>>> crash on the source VM. +> +>>>>> +> +>>>>> I suspect that, as cpr-transfer doesn't migrate the guest memory, but +> +>>>>> rather passes it through the memory backend object, our code might +> +>>>>> somehow corrupt the VRAM. However, I wasn't able to trace the +> +>>>>> corruption so far. +> +>>>>> +> +>>>>> Could somebody help the investigation and take a look into this? Any +> +>>>>> suggestions would be appreciated. Thanks! +> +>>>> +> +>>>> Possibly some memory region created by qxl is not being preserved. +> +>>>> Try adding these traces to see what is preserved: +> +>>>> +> +>>>> -trace enable='*cpr*' +> +>>>> -trace enable='*ram_alloc*' +> +>>> +> +>>> Also try adding this patch to see if it flags any ram blocks as not +> +>>> compatible with cpr. A message is printed at migration start time. +> +>>>   +https://lore.kernel.org/qemu-devel/1740667681-257312-1-git-send- +> +>>> email- +> +>>> steven.sistare@oracle.com/ +> +>>> +> +>>> - Steve +> +>>> +> +>> +> +>> With the traces enabled + the "migration: ram block cpr blockers" patch +> +>> applied: +> +>> +> +>> Source: +> +>>> cpr_find_fd pc.bios, id 0 returns -1 +> +>>> cpr_save_fd pc.bios, id 0, fd 22 +> +>>> qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 22 host +> +>>> 0x7fec18e00000 +> +>>> cpr_find_fd pc.rom, id 0 returns -1 +> +>>> cpr_save_fd pc.rom, id 0, fd 23 +> +>>> qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 23 host +> +>>> 0x7fec18c00000 +> +>>> cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns -1 +> +>>> cpr_save_fd 0000:00:01.0/e1000e.rom, id 0, fd 24 +> +>>> qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size +> +>>> 262144 fd 24 host 0x7fec18a00000 +> +>>> cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns -1 +> +>>> cpr_save_fd 0000:00:02.0/vga.vram, id 0, fd 25 +> +>>> qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size +> +>>> 67108864 fd 25 host 0x7feb77e00000 +> +>>> cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns -1 +> +>>> cpr_save_fd 0000:00:02.0/qxl.vrom, id 0, fd 27 +> +>>> qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 +> +>>> fd 27 host 0x7fec18800000 +> +>>> cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns -1 +> +>>> cpr_save_fd 0000:00:02.0/qxl.vram, id 0, fd 28 +> +>>> qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size +> +>>> 67108864 fd 28 host 0x7feb73c00000 +> +>>> cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns -1 +> +>>> cpr_save_fd 0000:00:02.0/qxl.rom, id 0, fd 34 +> +>>> qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 +> +>>> fd 34 host 0x7fec18600000 +> +>>> cpr_find_fd /rom@etc/acpi/tables, id 0 returns -1 +> +>>> cpr_save_fd /rom@etc/acpi/tables, id 0, fd 35 +> +>>> qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size +> +>>> 2097152 fd 35 host 0x7fec18200000 +> +>>> cpr_find_fd /rom@etc/table-loader, id 0 returns -1 +> +>>> cpr_save_fd /rom@etc/table-loader, id 0, fd 36 +> +>>> qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 +> +>>> fd 36 host 0x7feb8b600000 +> +>>> cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns -1 +> +>>> cpr_save_fd /rom@etc/acpi/rsdp, id 0, fd 37 +> +>>> qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd +> +>>> 37 host 0x7feb8b400000 +> +>>> +> +>>> cpr_state_save cpr-transfer mode +> +>>> cpr_transfer_output /var/run/alma8cpr-dst.sock +> +>> +> +>> Target: +> +>>> cpr_transfer_input /var/run/alma8cpr-dst.sock +> +>>> cpr_state_load cpr-transfer mode +> +>>> cpr_find_fd pc.bios, id 0 returns 20 +> +>>> qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 20 host +> +>>> 0x7fcdc9800000 +> +>>> cpr_find_fd pc.rom, id 0 returns 19 +> +>>> qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 19 host +> +>>> 0x7fcdc9600000 +> +>>> cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns 18 +> +>>> qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size +> +>>> 262144 fd 18 host 0x7fcdc9400000 +> +>>> cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns 17 +> +>>> qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size +> +>>> 67108864 fd 17 host 0x7fcd27e00000 +> +>>> cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns 16 +> +>>> qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 +> +>>> fd 16 host 0x7fcdc9200000 +> +>>> cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns 15 +> +>>> qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size +> +>>> 67108864 fd 15 host 0x7fcd23c00000 +> +>>> cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns 14 +> +>>> qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 +> +>>> fd 14 host 0x7fcdc8800000 +> +>>> cpr_find_fd /rom@etc/acpi/tables, id 0 returns 13 +> +>>> qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size +> +>>> 2097152 fd 13 host 0x7fcdc8400000 +> +>>> cpr_find_fd /rom@etc/table-loader, id 0 returns 11 +> +>>> qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 +> +>>> fd 11 host 0x7fcdc8200000 +> +>>> cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns 10 +> +>>> qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd +> +>>> 10 host 0x7fcd3be00000 +> +>> +> +>> Looks like both vga.vram and qxl.vram are being preserved (with the same +> +>> addresses), and no incompatible ram blocks are found during migration. +> +> +> +> Sorry, addressed are not the same, of course. However corresponding ram +> +> blocks do seem to be preserved and initialized. +> +> +So far, I have not reproduced the guest driver failure. +> +> +However, I have isolated places where new QEMU improperly writes to +> +the qxl memory regions prior to starting the guest, by mmap'ing them +> +readonly after cpr: +> +> + qemu_ram_alloc_internal() +> +   if (reused && (strstr(name, "qxl") || strstr("name", "vga"))) +> +       ram_flags |= RAM_READONLY; +> +   new_block = qemu_ram_alloc_from_fd(...) +> +> +I have attached a draft fix; try it and let me know. +> +My console window looks fine before and after cpr, using +> +-vnc $hostip:0 -vga qxl +> +> +- Steve +Regarding the reproduce: when I launch the buggy version with the same +options as you, i.e. "-vnc 0.0.0.0:$port -vga qxl", and do cpr-transfer, +my VNC client silently hangs on the target after a while. Could it +happen on your stand as well? Could you try launching VM with +"-nographic -device qxl-vga"? That way VM's serial console is given you +directly in the shell, so when qxl driver crashes you're still able to +inspect the kernel messages. + +As for your patch, I can report that it doesn't resolve the issue as it +is. But I was able to track down another possible memory corruption +using your approach with readonly mmap'ing: + +> +Program terminated with signal SIGSEGV, Segmentation fault. +> +#0 init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 +> +412 d->ram->magic = cpu_to_le32(QXL_RAM_MAGIC); +> +[Current thread is 1 (Thread 0x7f1a4f83b480 (LWP 229798))] +> +(gdb) bt +> +#0 init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 +> +#1 0x0000563896e7f467 in qxl_realize_common (qxl=0x5638996e0e70, +> +errp=0x7ffd3c2b8170) at ../hw/display/qxl.c:2142 +> +#2 0x0000563896e7fda1 in qxl_realize_primary (dev=0x5638996e0e70, +> +errp=0x7ffd3c2b81d0) at ../hw/display/qxl.c:2257 +> +#3 0x0000563896c7e8f2 in pci_qdev_realize (qdev=0x5638996e0e70, +> +errp=0x7ffd3c2b8250) at ../hw/pci/pci.c:2174 +> +#4 0x00005638970eb54b in device_set_realized (obj=0x5638996e0e70, +> +value=true, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:494 +> +#5 0x00005638970f5e14 in property_set_bool (obj=0x5638996e0e70, +> +v=0x5638996f3770, name=0x56389759b141 "realized", opaque=0x5638987893d0, +> +errp=0x7ffd3c2b84e0) +> +at ../qom/object.c:2374 +> +#6 0x00005638970f39f8 in object_property_set (obj=0x5638996e0e70, +> +name=0x56389759b141 "realized", v=0x5638996f3770, errp=0x7ffd3c2b84e0) +> +at ../qom/object.c:1449 +> +#7 0x00005638970f8586 in object_property_set_qobject (obj=0x5638996e0e70, +> +name=0x56389759b141 "realized", value=0x5638996df900, errp=0x7ffd3c2b84e0) +> +at ../qom/qom-qobject.c:28 +> +#8 0x00005638970f3d8d in object_property_set_bool (obj=0x5638996e0e70, +> +name=0x56389759b141 "realized", value=true, errp=0x7ffd3c2b84e0) +> +at ../qom/object.c:1519 +> +#9 0x00005638970eacb0 in qdev_realize (dev=0x5638996e0e70, +> +bus=0x563898cf3c20, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:276 +> +#10 0x0000563896dba675 in qdev_device_add_from_qdict (opts=0x5638996dfe50, +> +from_json=false, errp=0x7ffd3c2b84e0) at ../system/qdev-monitor.c:714 +> +#11 0x0000563896dba721 in qdev_device_add (opts=0x563898786150, +> +errp=0x56389855dc40 <error_fatal>) at ../system/qdev-monitor.c:733 +> +#12 0x0000563896dc48f1 in device_init_func (opaque=0x0, opts=0x563898786150, +> +errp=0x56389855dc40 <error_fatal>) at ../system/vl.c:1207 +> +#13 0x000056389737a6cc in qemu_opts_foreach +> +(list=0x563898427b60 <qemu_device_opts>, func=0x563896dc48ca +> +<device_init_func>, opaque=0x0, errp=0x56389855dc40 <error_fatal>) +> +at ../util/qemu-option.c:1135 +> +#14 0x0000563896dc89b5 in qemu_create_cli_devices () at ../system/vl.c:2745 +> +#15 0x0000563896dc8c00 in qmp_x_exit_preconfig (errp=0x56389855dc40 +> +<error_fatal>) at ../system/vl.c:2806 +> +#16 0x0000563896dcb5de in qemu_init (argc=33, argv=0x7ffd3c2b8948) at +> +../system/vl.c:3838 +> +#17 0x0000563897297323 in main (argc=33, argv=0x7ffd3c2b8948) at +> +../system/main.c:72 +So the attached adjusted version of your patch does seem to help. At +least I can't reproduce the crash on my stand. + +I'm wondering, could it be useful to explicitly mark all the reused +memory regions readonly upon cpr-transfer, and then make them writable +back again after the migration is done? That way we will be segfaulting +early on instead of debugging tricky memory corruptions. + +Andrey +0001-hw-qxl-cpr-support-preliminary.patch +Description: +Text Data + +On 3/5/2025 11:50 AM, Andrey Drobyshev wrote: +On 3/4/25 9:05 PM, Steven Sistare wrote: +On 2/28/2025 1:37 PM, Andrey Drobyshev wrote: +On 2/28/25 8:35 PM, Andrey Drobyshev wrote: +On 2/28/25 8:20 PM, Steven Sistare wrote: +On 2/28/2025 1:13 PM, Steven Sistare wrote: +On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: +Hi all, + +We've been experimenting with cpr-transfer migration mode recently +and +have discovered the following issue with the guest QXL driver: + +Run migration source: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-src.sock + +$EMULATOR -enable-kvm \ +      -machine q35 \ +      -cpu host -smp 2 -m 2G \ +      -object memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ +ram0,share=on\ +      -machine memory-backend=ram0 \ +      -machine aux-ram-share=on \ +      -drive file=$ROOTFS,media=disk,if=virtio \ +      -qmp unix:$QMPSOCK,server=on,wait=off \ +      -nographic \ +      -device qxl-vga +Run migration target: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-dst.sock +$EMULATOR -enable-kvm \ +      -machine q35 \ +      -cpu host -smp 2 -m 2G \ +      -object memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ +ram0,share=on\ +      -machine memory-backend=ram0 \ +      -machine aux-ram-share=on \ +      -drive file=$ROOTFS,media=disk,if=virtio \ +      -qmp unix:$QMPSOCK,server=on,wait=off \ +      -nographic \ +      -device qxl-vga \ +      -incoming tcp:0:44444 \ +      -incoming '{"channel-type": "cpr", "addr": { "transport": +"socket", "type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' +Launch the migration: +QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell +QMPSOCK=/var/run/alma8qmp-src.sock + +$QMPSHELL -p $QMPSOCK <<EOF +      migrate-set-parameters mode=cpr-transfer +      migrate channels=[{"channel-type":"main","addr": +{"transport":"socket","type":"inet","host":"0","port":"44444"}}, +{"channel-type":"cpr","addr": +{"transport":"socket","type":"unix","path":"/var/run/alma8cpr- +dst.sock"}}] +EOF +Then, after a while, QXL guest driver on target crashes spewing the +following messages: +[  73.962002] [TTM] Buffer eviction failed +[  73.962072] qxl 0000:00:02.0: object_init failed for (3149824, +0x00000001) +[  73.962081] [drm:qxl_alloc_bo_reserved [qxl]] *ERROR* failed to +allocate VRAM BO +That seems to be a known kernel QXL driver bug: +https://lore.kernel.org/all/20220907094423.93581-1- +min_halo@163.com/T/ +https://lore.kernel.org/lkml/ZTgydqRlK6WX_b29@eldamar.lan/ +(the latter discussion contains that reproduce script which speeds up +the crash in the guest): +#!/bin/bash + +chvt 3 + +for j in $(seq 80); do +          echo "$(date) starting round $j" +          if [ "$(journalctl --boot | grep "failed to allocate VRAM +BO")" != "" ]; then +                  echo "bug was reproduced after $j tries" +                  exit 1 +          fi +          for i in $(seq 100); do +                  dmesg > /dev/tty3 +          done +done + +echo "bug could not be reproduced" +exit 0 +The bug itself seems to remain unfixed, as I was able to reproduce +that +with Fedora 41 guest, as well as AlmaLinux 8 guest. However our +cpr-transfer code also seems to be buggy as it triggers the crash - +without the cpr-transfer migration the above reproduce doesn't +lead to +crash on the source VM. + +I suspect that, as cpr-transfer doesn't migrate the guest memory, but +rather passes it through the memory backend object, our code might +somehow corrupt the VRAM. However, I wasn't able to trace the +corruption so far. + +Could somebody help the investigation and take a look into this? Any +suggestions would be appreciated. Thanks! +Possibly some memory region created by qxl is not being preserved. +Try adding these traces to see what is preserved: + +-trace enable='*cpr*' +-trace enable='*ram_alloc*' +Also try adding this patch to see if it flags any ram blocks as not +compatible with cpr. A message is printed at migration start time. +   +https://lore.kernel.org/qemu-devel/1740667681-257312-1-git-send- +email- +steven.sistare@oracle.com/ + +- Steve +With the traces enabled + the "migration: ram block cpr blockers" patch +applied: + +Source: +cpr_find_fd pc.bios, id 0 returns -1 +cpr_save_fd pc.bios, id 0, fd 22 +qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 22 host +0x7fec18e00000 +cpr_find_fd pc.rom, id 0 returns -1 +cpr_save_fd pc.rom, id 0, fd 23 +qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 23 host +0x7fec18c00000 +cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns -1 +cpr_save_fd 0000:00:01.0/e1000e.rom, id 0, fd 24 +qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size +262144 fd 24 host 0x7fec18a00000 +cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns -1 +cpr_save_fd 0000:00:02.0/vga.vram, id 0, fd 25 +qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size +67108864 fd 25 host 0x7feb77e00000 +cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.vrom, id 0, fd 27 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 +fd 27 host 0x7fec18800000 +cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.vram, id 0, fd 28 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size +67108864 fd 28 host 0x7feb73c00000 +cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.rom, id 0, fd 34 +qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 +fd 34 host 0x7fec18600000 +cpr_find_fd /rom@etc/acpi/tables, id 0 returns -1 +cpr_save_fd /rom@etc/acpi/tables, id 0, fd 35 +qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size +2097152 fd 35 host 0x7fec18200000 +cpr_find_fd /rom@etc/table-loader, id 0 returns -1 +cpr_save_fd /rom@etc/table-loader, id 0, fd 36 +qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 +fd 36 host 0x7feb8b600000 +cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns -1 +cpr_save_fd /rom@etc/acpi/rsdp, id 0, fd 37 +qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd +37 host 0x7feb8b400000 + +cpr_state_save cpr-transfer mode +cpr_transfer_output /var/run/alma8cpr-dst.sock +Target: +cpr_transfer_input /var/run/alma8cpr-dst.sock +cpr_state_load cpr-transfer mode +cpr_find_fd pc.bios, id 0 returns 20 +qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 20 host +0x7fcdc9800000 +cpr_find_fd pc.rom, id 0 returns 19 +qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 19 host +0x7fcdc9600000 +cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns 18 +qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size +262144 fd 18 host 0x7fcdc9400000 +cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns 17 +qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size +67108864 fd 17 host 0x7fcd27e00000 +cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns 16 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 +fd 16 host 0x7fcdc9200000 +cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns 15 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size +67108864 fd 15 host 0x7fcd23c00000 +cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns 14 +qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 +fd 14 host 0x7fcdc8800000 +cpr_find_fd /rom@etc/acpi/tables, id 0 returns 13 +qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size +2097152 fd 13 host 0x7fcdc8400000 +cpr_find_fd /rom@etc/table-loader, id 0 returns 11 +qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 +fd 11 host 0x7fcdc8200000 +cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns 10 +qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd +10 host 0x7fcd3be00000 +Looks like both vga.vram and qxl.vram are being preserved (with the same +addresses), and no incompatible ram blocks are found during migration. +Sorry, addressed are not the same, of course. However corresponding ram +blocks do seem to be preserved and initialized. +So far, I have not reproduced the guest driver failure. + +However, I have isolated places where new QEMU improperly writes to +the qxl memory regions prior to starting the guest, by mmap'ing them +readonly after cpr: + +  qemu_ram_alloc_internal() +    if (reused && (strstr(name, "qxl") || strstr("name", "vga"))) +        ram_flags |= RAM_READONLY; +    new_block = qemu_ram_alloc_from_fd(...) + +I have attached a draft fix; try it and let me know. +My console window looks fine before and after cpr, using +-vnc $hostip:0 -vga qxl + +- Steve +Regarding the reproduce: when I launch the buggy version with the same +options as you, i.e. "-vnc 0.0.0.0:$port -vga qxl", and do cpr-transfer, +my VNC client silently hangs on the target after a while. Could it +happen on your stand as well? +cpr does not preserve the vnc connection and session. To test, I specify +port 0 for the source VM and port 1 for the dest. When the src vnc goes +dormant the dest vnc becomes active. +Could you try launching VM with +"-nographic -device qxl-vga"? That way VM's serial console is given you +directly in the shell, so when qxl driver crashes you're still able to +inspect the kernel messages. +I have been running like that, but have not reproduced the qxl driver crash, +and I suspect my guest image+kernel is too old. However, once I realized the +issue was post-cpr modification of qxl memory, I switched my attention to the +fix. +As for your patch, I can report that it doesn't resolve the issue as it +is. But I was able to track down another possible memory corruption +using your approach with readonly mmap'ing: +Program terminated with signal SIGSEGV, Segmentation fault. +#0 init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 +412 d->ram->magic = cpu_to_le32(QXL_RAM_MAGIC); +[Current thread is 1 (Thread 0x7f1a4f83b480 (LWP 229798))] +(gdb) bt +#0 init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 +#1 0x0000563896e7f467 in qxl_realize_common (qxl=0x5638996e0e70, +errp=0x7ffd3c2b8170) at ../hw/display/qxl.c:2142 +#2 0x0000563896e7fda1 in qxl_realize_primary (dev=0x5638996e0e70, +errp=0x7ffd3c2b81d0) at ../hw/display/qxl.c:2257 +#3 0x0000563896c7e8f2 in pci_qdev_realize (qdev=0x5638996e0e70, +errp=0x7ffd3c2b8250) at ../hw/pci/pci.c:2174 +#4 0x00005638970eb54b in device_set_realized (obj=0x5638996e0e70, value=true, +errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:494 +#5 0x00005638970f5e14 in property_set_bool (obj=0x5638996e0e70, v=0x5638996f3770, +name=0x56389759b141 "realized", opaque=0x5638987893d0, errp=0x7ffd3c2b84e0) + at ../qom/object.c:2374 +#6 0x00005638970f39f8 in object_property_set (obj=0x5638996e0e70, name=0x56389759b141 +"realized", v=0x5638996f3770, errp=0x7ffd3c2b84e0) + at ../qom/object.c:1449 +#7 0x00005638970f8586 in object_property_set_qobject (obj=0x5638996e0e70, +name=0x56389759b141 "realized", value=0x5638996df900, errp=0x7ffd3c2b84e0) + at ../qom/qom-qobject.c:28 +#8 0x00005638970f3d8d in object_property_set_bool (obj=0x5638996e0e70, +name=0x56389759b141 "realized", value=true, errp=0x7ffd3c2b84e0) + at ../qom/object.c:1519 +#9 0x00005638970eacb0 in qdev_realize (dev=0x5638996e0e70, bus=0x563898cf3c20, +errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:276 +#10 0x0000563896dba675 in qdev_device_add_from_qdict (opts=0x5638996dfe50, +from_json=false, errp=0x7ffd3c2b84e0) at ../system/qdev-monitor.c:714 +#11 0x0000563896dba721 in qdev_device_add (opts=0x563898786150, errp=0x56389855dc40 +<error_fatal>) at ../system/qdev-monitor.c:733 +#12 0x0000563896dc48f1 in device_init_func (opaque=0x0, opts=0x563898786150, +errp=0x56389855dc40 <error_fatal>) at ../system/vl.c:1207 +#13 0x000056389737a6cc in qemu_opts_foreach + (list=0x563898427b60 <qemu_device_opts>, func=0x563896dc48ca <device_init_func>, +opaque=0x0, errp=0x56389855dc40 <error_fatal>) + at ../util/qemu-option.c:1135 +#14 0x0000563896dc89b5 in qemu_create_cli_devices () at ../system/vl.c:2745 +#15 0x0000563896dc8c00 in qmp_x_exit_preconfig (errp=0x56389855dc40 +<error_fatal>) at ../system/vl.c:2806 +#16 0x0000563896dcb5de in qemu_init (argc=33, argv=0x7ffd3c2b8948) at +../system/vl.c:3838 +#17 0x0000563897297323 in main (argc=33, argv=0x7ffd3c2b8948) at +../system/main.c:72 +So the attached adjusted version of your patch does seem to help. At +least I can't reproduce the crash on my stand. +Thanks for the stack trace; the calls to SPICE_RING_INIT in init_qxl_ram are +definitely harmful. Try V2 of the patch, attached, which skips the lines +of init_qxl_ram that modify guest memory. +I'm wondering, could it be useful to explicitly mark all the reused +memory regions readonly upon cpr-transfer, and then make them writable +back again after the migration is done? That way we will be segfaulting +early on instead of debugging tricky memory corruptions. +It's a useful debugging technique, but changing protection on a large memory +region +can be too expensive for production due to TLB shootdowns. + +Also, there are cases where writes are performed but the value is guaranteed to +be the same: + qxl_post_load() + qxl_set_mode() + d->rom->mode = cpu_to_le32(modenr); +The value is the same because mode and shadow_rom.mode were passed in vmstate +from old qemu. + +- Steve +0001-hw-qxl-cpr-support-preliminary-V2.patch +Description: +Text document + +On 3/5/25 22:19, Steven Sistare wrote: +On 3/5/2025 11:50 AM, Andrey Drobyshev wrote: +On 3/4/25 9:05 PM, Steven Sistare wrote: +On 2/28/2025 1:37 PM, Andrey Drobyshev wrote: +On 2/28/25 8:35 PM, Andrey Drobyshev wrote: +On 2/28/25 8:20 PM, Steven Sistare wrote: +On 2/28/2025 1:13 PM, Steven Sistare wrote: +On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: +Hi all, + +We've been experimenting with cpr-transfer migration mode recently +and +have discovered the following issue with the guest QXL driver: + +Run migration source: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-src.sock + +$EMULATOR -enable-kvm \ +      -machine q35 \ +      -cpu host -smp 2 -m 2G \ +      -object +memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ +ram0,share=on\ +      -machine memory-backend=ram0 \ +      -machine aux-ram-share=on \ +      -drive file=$ROOTFS,media=disk,if=virtio \ +      -qmp unix:$QMPSOCK,server=on,wait=off \ +      -nographic \ +      -device qxl-vga +Run migration target: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-dst.sock +$EMULATOR -enable-kvm \ +      -machine q35 \ +      -cpu host -smp 2 -m 2G \ +      -object +memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ +ram0,share=on\ +      -machine memory-backend=ram0 \ +      -machine aux-ram-share=on \ +      -drive file=$ROOTFS,media=disk,if=virtio \ +      -qmp unix:$QMPSOCK,server=on,wait=off \ +      -nographic \ +      -device qxl-vga \ +      -incoming tcp:0:44444 \ +      -incoming '{"channel-type": "cpr", "addr": { "transport": +"socket", "type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' +Launch the migration: +QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell +QMPSOCK=/var/run/alma8qmp-src.sock + +$QMPSHELL -p $QMPSOCK <<EOF +      migrate-set-parameters mode=cpr-transfer +      migrate channels=[{"channel-type":"main","addr": +{"transport":"socket","type":"inet","host":"0","port":"44444"}}, +{"channel-type":"cpr","addr": +{"transport":"socket","type":"unix","path":"/var/run/alma8cpr- +dst.sock"}}] +EOF +Then, after a while, QXL guest driver on target crashes spewing +the +following messages: +[  73.962002] [TTM] Buffer eviction failed +[  73.962072] qxl 0000:00:02.0: object_init failed for (3149824, +0x00000001) +[  73.962081] [drm:qxl_alloc_bo_reserved [qxl]] *ERROR* +failed to +allocate VRAM BO +That seems to be a known kernel QXL driver bug: +https://lore.kernel.org/all/20220907094423.93581-1- +min_halo@163.com/T/ +https://lore.kernel.org/lkml/ZTgydqRlK6WX_b29@eldamar.lan/ +(the latter discussion contains that reproduce script which +speeds up +the crash in the guest): +#!/bin/bash + +chvt 3 + +for j in $(seq 80); do +          echo "$(date) starting round $j" +          if [ "$(journalctl --boot | grep "failed to +allocate VRAM +BO")" != "" ]; then +                  echo "bug was reproduced after $j tries" +                  exit 1 +          fi +          for i in $(seq 100); do +                  dmesg > /dev/tty3 +          done +done + +echo "bug could not be reproduced" +exit 0 +The bug itself seems to remain unfixed, as I was able to reproduce +that +with Fedora 41 guest, as well as AlmaLinux 8 guest. However our +cpr-transfer code also seems to be buggy as it triggers the +crash - +without the cpr-transfer migration the above reproduce doesn't +lead to +crash on the source VM. +I suspect that, as cpr-transfer doesn't migrate the guest +memory, but +rather passes it through the memory backend object, our code might +somehow corrupt the VRAM. However, I wasn't able to trace the +corruption so far. +Could somebody help the investigation and take a look into +this? Any +suggestions would be appreciated. Thanks! +Possibly some memory region created by qxl is not being preserved. +Try adding these traces to see what is preserved: + +-trace enable='*cpr*' +-trace enable='*ram_alloc*' +Also try adding this patch to see if it flags any ram blocks as not +compatible with cpr. A message is printed at migration start time. +https://lore.kernel.org/qemu-devel/1740667681-257312-1-git-send- +email- +steven.sistare@oracle.com/ + +- Steve +With the traces enabled + the "migration: ram block cpr blockers" +patch +applied: + +Source: +cpr_find_fd pc.bios, id 0 returns -1 +cpr_save_fd pc.bios, id 0, fd 22 +qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 22 host +0x7fec18e00000 +cpr_find_fd pc.rom, id 0 returns -1 +cpr_save_fd pc.rom, id 0, fd 23 +qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 23 host +0x7fec18c00000 +cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns -1 +cpr_save_fd 0000:00:01.0/e1000e.rom, id 0, fd 24 +qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size +262144 fd 24 host 0x7fec18a00000 +cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns -1 +cpr_save_fd 0000:00:02.0/vga.vram, id 0, fd 25 +qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size +67108864 fd 25 host 0x7feb77e00000 +cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.vrom, id 0, fd 27 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 +fd 27 host 0x7fec18800000 +cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.vram, id 0, fd 28 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size +67108864 fd 28 host 0x7feb73c00000 +cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.rom, id 0, fd 34 +qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 +fd 34 host 0x7fec18600000 +cpr_find_fd /rom@etc/acpi/tables, id 0 returns -1 +cpr_save_fd /rom@etc/acpi/tables, id 0, fd 35 +qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size +2097152 fd 35 host 0x7fec18200000 +cpr_find_fd /rom@etc/table-loader, id 0 returns -1 +cpr_save_fd /rom@etc/table-loader, id 0, fd 36 +qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 +fd 36 host 0x7feb8b600000 +cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns -1 +cpr_save_fd /rom@etc/acpi/rsdp, id 0, fd 37 +qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd +37 host 0x7feb8b400000 + +cpr_state_save cpr-transfer mode +cpr_transfer_output /var/run/alma8cpr-dst.sock +Target: +cpr_transfer_input /var/run/alma8cpr-dst.sock +cpr_state_load cpr-transfer mode +cpr_find_fd pc.bios, id 0 returns 20 +qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 20 host +0x7fcdc9800000 +cpr_find_fd pc.rom, id 0 returns 19 +qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 19 host +0x7fcdc9600000 +cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns 18 +qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size +262144 fd 18 host 0x7fcdc9400000 +cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns 17 +qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size +67108864 fd 17 host 0x7fcd27e00000 +cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns 16 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 +fd 16 host 0x7fcdc9200000 +cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns 15 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size +67108864 fd 15 host 0x7fcd23c00000 +cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns 14 +qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 +fd 14 host 0x7fcdc8800000 +cpr_find_fd /rom@etc/acpi/tables, id 0 returns 13 +qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size +2097152 fd 13 host 0x7fcdc8400000 +cpr_find_fd /rom@etc/table-loader, id 0 returns 11 +qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 +fd 11 host 0x7fcdc8200000 +cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns 10 +qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd +10 host 0x7fcd3be00000 +Looks like both vga.vram and qxl.vram are being preserved (with +the same +addresses), and no incompatible ram blocks are found during +migration. +Sorry, addressed are not the same, of course. However +corresponding ram +blocks do seem to be preserved and initialized. +So far, I have not reproduced the guest driver failure. + +However, I have isolated places where new QEMU improperly writes to +the qxl memory regions prior to starting the guest, by mmap'ing them +readonly after cpr: + +  qemu_ram_alloc_internal() +    if (reused && (strstr(name, "qxl") || strstr("name", "vga"))) +        ram_flags |= RAM_READONLY; +    new_block = qemu_ram_alloc_from_fd(...) + +I have attached a draft fix; try it and let me know. +My console window looks fine before and after cpr, using +-vnc $hostip:0 -vga qxl + +- Steve +Regarding the reproduce: when I launch the buggy version with the same +options as you, i.e. "-vnc 0.0.0.0:$port -vga qxl", and do cpr-transfer, +my VNC client silently hangs on the target after a while. Could it +happen on your stand as well? +cpr does not preserve the vnc connection and session. To test, I specify +port 0 for the source VM and port 1 for the dest. When the src vnc goes +dormant the dest vnc becomes active. +Could you try launching VM with +"-nographic -device qxl-vga"? That way VM's serial console is given you +directly in the shell, so when qxl driver crashes you're still able to +inspect the kernel messages. +I have been running like that, but have not reproduced the qxl driver +crash, +and I suspect my guest image+kernel is too old. However, once I +realized the +issue was post-cpr modification of qxl memory, I switched my attention +to the +fix. +As for your patch, I can report that it doesn't resolve the issue as it +is. But I was able to track down another possible memory corruption +using your approach with readonly mmap'ing: +Program terminated with signal SIGSEGV, Segmentation fault. +#0 init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 +412        d->ram->magic      = cpu_to_le32(QXL_RAM_MAGIC); +[Current thread is 1 (Thread 0x7f1a4f83b480 (LWP 229798))] +(gdb) bt +#0 init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 +#1 0x0000563896e7f467 in qxl_realize_common (qxl=0x5638996e0e70, +errp=0x7ffd3c2b8170) at ../hw/display/qxl.c:2142 +#2 0x0000563896e7fda1 in qxl_realize_primary (dev=0x5638996e0e70, +errp=0x7ffd3c2b81d0) at ../hw/display/qxl.c:2257 +#3 0x0000563896c7e8f2 in pci_qdev_realize (qdev=0x5638996e0e70, +errp=0x7ffd3c2b8250) at ../hw/pci/pci.c:2174 +#4 0x00005638970eb54b in device_set_realized (obj=0x5638996e0e70, +value=true, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:494 +#5 0x00005638970f5e14 in property_set_bool (obj=0x5638996e0e70, +v=0x5638996f3770, name=0x56389759b141 "realized", +opaque=0x5638987893d0, errp=0x7ffd3c2b84e0) +    at ../qom/object.c:2374 +#6 0x00005638970f39f8 in object_property_set (obj=0x5638996e0e70, +name=0x56389759b141 "realized", v=0x5638996f3770, errp=0x7ffd3c2b84e0) +    at ../qom/object.c:1449 +#7 0x00005638970f8586 in object_property_set_qobject +(obj=0x5638996e0e70, name=0x56389759b141 "realized", +value=0x5638996df900, errp=0x7ffd3c2b84e0) +    at ../qom/qom-qobject.c:28 +#8 0x00005638970f3d8d in object_property_set_bool +(obj=0x5638996e0e70, name=0x56389759b141 "realized", value=true, +errp=0x7ffd3c2b84e0) +    at ../qom/object.c:1519 +#9 0x00005638970eacb0 in qdev_realize (dev=0x5638996e0e70, +bus=0x563898cf3c20, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:276 +#10 0x0000563896dba675 in qdev_device_add_from_qdict +(opts=0x5638996dfe50, from_json=false, errp=0x7ffd3c2b84e0) at +../system/qdev-monitor.c:714 +#11 0x0000563896dba721 in qdev_device_add (opts=0x563898786150, +errp=0x56389855dc40 <error_fatal>) at ../system/qdev-monitor.c:733 +#12 0x0000563896dc48f1 in device_init_func (opaque=0x0, +opts=0x563898786150, errp=0x56389855dc40 <error_fatal>) at +../system/vl.c:1207 +#13 0x000056389737a6cc in qemu_opts_foreach +    (list=0x563898427b60 <qemu_device_opts>, func=0x563896dc48ca +<device_init_func>, opaque=0x0, errp=0x56389855dc40 <error_fatal>) +    at ../util/qemu-option.c:1135 +#14 0x0000563896dc89b5 in qemu_create_cli_devices () at +../system/vl.c:2745 +#15 0x0000563896dc8c00 in qmp_x_exit_preconfig (errp=0x56389855dc40 +<error_fatal>) at ../system/vl.c:2806 +#16 0x0000563896dcb5de in qemu_init (argc=33, argv=0x7ffd3c2b8948) +at ../system/vl.c:3838 +#17 0x0000563897297323 in main (argc=33, argv=0x7ffd3c2b8948) at +../system/main.c:72 +So the attached adjusted version of your patch does seem to help. At +least I can't reproduce the crash on my stand. +Thanks for the stack trace; the calls to SPICE_RING_INIT in +init_qxl_ram are +definitely harmful. Try V2 of the patch, attached, which skips the lines +of init_qxl_ram that modify guest memory. +I'm wondering, could it be useful to explicitly mark all the reused +memory regions readonly upon cpr-transfer, and then make them writable +back again after the migration is done? That way we will be segfaulting +early on instead of debugging tricky memory corruptions. +It's a useful debugging technique, but changing protection on a large +memory region +can be too expensive for production due to TLB shootdowns. +Good point. Though we could move this code under non-default option to +avoid re-writing. + +Den + +On 3/5/25 11:19 PM, Steven Sistare wrote: +> +On 3/5/2025 11:50 AM, Andrey Drobyshev wrote: +> +> On 3/4/25 9:05 PM, Steven Sistare wrote: +> +>> On 2/28/2025 1:37 PM, Andrey Drobyshev wrote: +> +>>> On 2/28/25 8:35 PM, Andrey Drobyshev wrote: +> +>>>> On 2/28/25 8:20 PM, Steven Sistare wrote: +> +>>>>> On 2/28/2025 1:13 PM, Steven Sistare wrote: +> +>>>>>> On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: +> +>>>>>>> Hi all, +> +>>>>>>> +> +>>>>>>> We've been experimenting with cpr-transfer migration mode recently +> +>>>>>>> and +> +>>>>>>> have discovered the following issue with the guest QXL driver: +> +>>>>>>> +> +>>>>>>> Run migration source: +> +>>>>>>>> EMULATOR=/path/to/emulator +> +>>>>>>>> ROOTFS=/path/to/image +> +>>>>>>>> QMPSOCK=/var/run/alma8qmp-src.sock +> +>>>>>>>> +> +>>>>>>>> $EMULATOR -enable-kvm \ +> +>>>>>>>>       -machine q35 \ +> +>>>>>>>>       -cpu host -smp 2 -m 2G \ +> +>>>>>>>>       -object memory-backend-file,id=ram0,size=2G,mem-path=/ +> +>>>>>>>> dev/shm/ +> +>>>>>>>> ram0,share=on\ +> +>>>>>>>>       -machine memory-backend=ram0 \ +> +>>>>>>>>       -machine aux-ram-share=on \ +> +>>>>>>>>       -drive file=$ROOTFS,media=disk,if=virtio \ +> +>>>>>>>>       -qmp unix:$QMPSOCK,server=on,wait=off \ +> +>>>>>>>>       -nographic \ +> +>>>>>>>>       -device qxl-vga +> +>>>>>>> +> +>>>>>>> Run migration target: +> +>>>>>>>> EMULATOR=/path/to/emulator +> +>>>>>>>> ROOTFS=/path/to/image +> +>>>>>>>> QMPSOCK=/var/run/alma8qmp-dst.sock +> +>>>>>>>> $EMULATOR -enable-kvm \ +> +>>>>>>>>       -machine q35 \ +> +>>>>>>>>       -cpu host -smp 2 -m 2G \ +> +>>>>>>>>       -object memory-backend-file,id=ram0,size=2G,mem-path=/ +> +>>>>>>>> dev/shm/ +> +>>>>>>>> ram0,share=on\ +> +>>>>>>>>       -machine memory-backend=ram0 \ +> +>>>>>>>>       -machine aux-ram-share=on \ +> +>>>>>>>>       -drive file=$ROOTFS,media=disk,if=virtio \ +> +>>>>>>>>       -qmp unix:$QMPSOCK,server=on,wait=off \ +> +>>>>>>>>       -nographic \ +> +>>>>>>>>       -device qxl-vga \ +> +>>>>>>>>       -incoming tcp:0:44444 \ +> +>>>>>>>>       -incoming '{"channel-type": "cpr", "addr": { "transport": +> +>>>>>>>> "socket", "type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' +> +>>>>>>> +> +>>>>>>> +> +>>>>>>> Launch the migration: +> +>>>>>>>> QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell +> +>>>>>>>> QMPSOCK=/var/run/alma8qmp-src.sock +> +>>>>>>>> +> +>>>>>>>> $QMPSHELL -p $QMPSOCK <<EOF +> +>>>>>>>>       migrate-set-parameters mode=cpr-transfer +> +>>>>>>>>       migrate channels=[{"channel-type":"main","addr": +> +>>>>>>>> {"transport":"socket","type":"inet","host":"0","port":"44444"}}, +> +>>>>>>>> {"channel-type":"cpr","addr": +> +>>>>>>>> {"transport":"socket","type":"unix","path":"/var/run/alma8cpr- +> +>>>>>>>> dst.sock"}}] +> +>>>>>>>> EOF +> +>>>>>>> +> +>>>>>>> Then, after a while, QXL guest driver on target crashes spewing the +> +>>>>>>> following messages: +> +>>>>>>>> [  73.962002] [TTM] Buffer eviction failed +> +>>>>>>>> [  73.962072] qxl 0000:00:02.0: object_init failed for (3149824, +> +>>>>>>>> 0x00000001) +> +>>>>>>>> [  73.962081] [drm:qxl_alloc_bo_reserved [qxl]] *ERROR* failed to +> +>>>>>>>> allocate VRAM BO +> +>>>>>>> +> +>>>>>>> That seems to be a known kernel QXL driver bug: +> +>>>>>>> +> +>>>>>>> +https://lore.kernel.org/all/20220907094423.93581-1- +> +>>>>>>> min_halo@163.com/T/ +> +>>>>>>> +https://lore.kernel.org/lkml/ZTgydqRlK6WX_b29@eldamar.lan/ +> +>>>>>>> +> +>>>>>>> (the latter discussion contains that reproduce script which +> +>>>>>>> speeds up +> +>>>>>>> the crash in the guest): +> +>>>>>>>> #!/bin/bash +> +>>>>>>>> +> +>>>>>>>> chvt 3 +> +>>>>>>>> +> +>>>>>>>> for j in $(seq 80); do +> +>>>>>>>>           echo "$(date) starting round $j" +> +>>>>>>>>           if [ "$(journalctl --boot | grep "failed to allocate +> +>>>>>>>> VRAM +> +>>>>>>>> BO")" != "" ]; then +> +>>>>>>>>                   echo "bug was reproduced after $j tries" +> +>>>>>>>>                   exit 1 +> +>>>>>>>>           fi +> +>>>>>>>>           for i in $(seq 100); do +> +>>>>>>>>                   dmesg > /dev/tty3 +> +>>>>>>>>           done +> +>>>>>>>> done +> +>>>>>>>> +> +>>>>>>>> echo "bug could not be reproduced" +> +>>>>>>>> exit 0 +> +>>>>>>> +> +>>>>>>> The bug itself seems to remain unfixed, as I was able to reproduce +> +>>>>>>> that +> +>>>>>>> with Fedora 41 guest, as well as AlmaLinux 8 guest. However our +> +>>>>>>> cpr-transfer code also seems to be buggy as it triggers the crash - +> +>>>>>>> without the cpr-transfer migration the above reproduce doesn't +> +>>>>>>> lead to +> +>>>>>>> crash on the source VM. +> +>>>>>>> +> +>>>>>>> I suspect that, as cpr-transfer doesn't migrate the guest +> +>>>>>>> memory, but +> +>>>>>>> rather passes it through the memory backend object, our code might +> +>>>>>>> somehow corrupt the VRAM. However, I wasn't able to trace the +> +>>>>>>> corruption so far. +> +>>>>>>> +> +>>>>>>> Could somebody help the investigation and take a look into +> +>>>>>>> this? Any +> +>>>>>>> suggestions would be appreciated. Thanks! +> +>>>>>> +> +>>>>>> Possibly some memory region created by qxl is not being preserved. +> +>>>>>> Try adding these traces to see what is preserved: +> +>>>>>> +> +>>>>>> -trace enable='*cpr*' +> +>>>>>> -trace enable='*ram_alloc*' +> +>>>>> +> +>>>>> Also try adding this patch to see if it flags any ram blocks as not +> +>>>>> compatible with cpr. A message is printed at migration start time. +> +>>>>>    +https://lore.kernel.org/qemu-devel/1740667681-257312-1-git-send- +> +>>>>> email- +> +>>>>> steven.sistare@oracle.com/ +> +>>>>> +> +>>>>> - Steve +> +>>>>> +> +>>>> +> +>>>> With the traces enabled + the "migration: ram block cpr blockers" +> +>>>> patch +> +>>>> applied: +> +>>>> +> +>>>> Source: +> +>>>>> cpr_find_fd pc.bios, id 0 returns -1 +> +>>>>> cpr_save_fd pc.bios, id 0, fd 22 +> +>>>>> qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 22 host +> +>>>>> 0x7fec18e00000 +> +>>>>> cpr_find_fd pc.rom, id 0 returns -1 +> +>>>>> cpr_save_fd pc.rom, id 0, fd 23 +> +>>>>> qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 23 host +> +>>>>> 0x7fec18c00000 +> +>>>>> cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns -1 +> +>>>>> cpr_save_fd 0000:00:01.0/e1000e.rom, id 0, fd 24 +> +>>>>> qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size +> +>>>>> 262144 fd 24 host 0x7fec18a00000 +> +>>>>> cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns -1 +> +>>>>> cpr_save_fd 0000:00:02.0/vga.vram, id 0, fd 25 +> +>>>>> qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size +> +>>>>> 67108864 fd 25 host 0x7feb77e00000 +> +>>>>> cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns -1 +> +>>>>> cpr_save_fd 0000:00:02.0/qxl.vrom, id 0, fd 27 +> +>>>>> qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 +> +>>>>> fd 27 host 0x7fec18800000 +> +>>>>> cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns -1 +> +>>>>> cpr_save_fd 0000:00:02.0/qxl.vram, id 0, fd 28 +> +>>>>> qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size +> +>>>>> 67108864 fd 28 host 0x7feb73c00000 +> +>>>>> cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns -1 +> +>>>>> cpr_save_fd 0000:00:02.0/qxl.rom, id 0, fd 34 +> +>>>>> qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 +> +>>>>> fd 34 host 0x7fec18600000 +> +>>>>> cpr_find_fd /rom@etc/acpi/tables, id 0 returns -1 +> +>>>>> cpr_save_fd /rom@etc/acpi/tables, id 0, fd 35 +> +>>>>> qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size +> +>>>>> 2097152 fd 35 host 0x7fec18200000 +> +>>>>> cpr_find_fd /rom@etc/table-loader, id 0 returns -1 +> +>>>>> cpr_save_fd /rom@etc/table-loader, id 0, fd 36 +> +>>>>> qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 +> +>>>>> fd 36 host 0x7feb8b600000 +> +>>>>> cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns -1 +> +>>>>> cpr_save_fd /rom@etc/acpi/rsdp, id 0, fd 37 +> +>>>>> qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd +> +>>>>> 37 host 0x7feb8b400000 +> +>>>>> +> +>>>>> cpr_state_save cpr-transfer mode +> +>>>>> cpr_transfer_output /var/run/alma8cpr-dst.sock +> +>>>> +> +>>>> Target: +> +>>>>> cpr_transfer_input /var/run/alma8cpr-dst.sock +> +>>>>> cpr_state_load cpr-transfer mode +> +>>>>> cpr_find_fd pc.bios, id 0 returns 20 +> +>>>>> qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 20 host +> +>>>>> 0x7fcdc9800000 +> +>>>>> cpr_find_fd pc.rom, id 0 returns 19 +> +>>>>> qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 19 host +> +>>>>> 0x7fcdc9600000 +> +>>>>> cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns 18 +> +>>>>> qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size +> +>>>>> 262144 fd 18 host 0x7fcdc9400000 +> +>>>>> cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns 17 +> +>>>>> qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size +> +>>>>> 67108864 fd 17 host 0x7fcd27e00000 +> +>>>>> cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns 16 +> +>>>>> qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 +> +>>>>> fd 16 host 0x7fcdc9200000 +> +>>>>> cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns 15 +> +>>>>> qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size +> +>>>>> 67108864 fd 15 host 0x7fcd23c00000 +> +>>>>> cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns 14 +> +>>>>> qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 +> +>>>>> fd 14 host 0x7fcdc8800000 +> +>>>>> cpr_find_fd /rom@etc/acpi/tables, id 0 returns 13 +> +>>>>> qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size +> +>>>>> 2097152 fd 13 host 0x7fcdc8400000 +> +>>>>> cpr_find_fd /rom@etc/table-loader, id 0 returns 11 +> +>>>>> qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 +> +>>>>> fd 11 host 0x7fcdc8200000 +> +>>>>> cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns 10 +> +>>>>> qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd +> +>>>>> 10 host 0x7fcd3be00000 +> +>>>> +> +>>>> Looks like both vga.vram and qxl.vram are being preserved (with the +> +>>>> same +> +>>>> addresses), and no incompatible ram blocks are found during migration. +> +>>> +> +>>> Sorry, addressed are not the same, of course. However corresponding +> +>>> ram +> +>>> blocks do seem to be preserved and initialized. +> +>> +> +>> So far, I have not reproduced the guest driver failure. +> +>> +> +>> However, I have isolated places where new QEMU improperly writes to +> +>> the qxl memory regions prior to starting the guest, by mmap'ing them +> +>> readonly after cpr: +> +>> +> +>>   qemu_ram_alloc_internal() +> +>>     if (reused && (strstr(name, "qxl") || strstr("name", "vga"))) +> +>>         ram_flags |= RAM_READONLY; +> +>>     new_block = qemu_ram_alloc_from_fd(...) +> +>> +> +>> I have attached a draft fix; try it and let me know. +> +>> My console window looks fine before and after cpr, using +> +>> -vnc $hostip:0 -vga qxl +> +>> +> +>> - Steve +> +> +> +> Regarding the reproduce: when I launch the buggy version with the same +> +> options as you, i.e. "-vnc 0.0.0.0:$port -vga qxl", and do cpr-transfer, +> +> my VNC client silently hangs on the target after a while. Could it +> +> happen on your stand as well? +> +> +cpr does not preserve the vnc connection and session. To test, I specify +> +port 0 for the source VM and port 1 for the dest. When the src vnc goes +> +dormant the dest vnc becomes active. +> +Sure, I meant that VNC on the dest (on the port 1) works for a while +after the migration and then hangs, apparently after the guest QXL crash. + +> +> Could you try launching VM with +> +> "-nographic -device qxl-vga"? That way VM's serial console is given you +> +> directly in the shell, so when qxl driver crashes you're still able to +> +> inspect the kernel messages. +> +> +I have been running like that, but have not reproduced the qxl driver +> +crash, +> +and I suspect my guest image+kernel is too old. +Yes, that's probably the case. But the crash occurs on my Fedora 41 +guest with the 6.11.5-300.fc41.x86_64 kernel, so newer kernels seem to +be buggy. + + +> +However, once I realized the +> +issue was post-cpr modification of qxl memory, I switched my attention +> +to the +> +fix. +> +> +> As for your patch, I can report that it doesn't resolve the issue as it +> +> is. But I was able to track down another possible memory corruption +> +> using your approach with readonly mmap'ing: +> +> +> +>> Program terminated with signal SIGSEGV, Segmentation fault. +> +>> #0 init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 +> +>> 412        d->ram->magic      = cpu_to_le32(QXL_RAM_MAGIC); +> +>> [Current thread is 1 (Thread 0x7f1a4f83b480 (LWP 229798))] +> +>> (gdb) bt +> +>> #0 init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 +> +>> #1 0x0000563896e7f467 in qxl_realize_common (qxl=0x5638996e0e70, +> +>> errp=0x7ffd3c2b8170) at ../hw/display/qxl.c:2142 +> +>> #2 0x0000563896e7fda1 in qxl_realize_primary (dev=0x5638996e0e70, +> +>> errp=0x7ffd3c2b81d0) at ../hw/display/qxl.c:2257 +> +>> #3 0x0000563896c7e8f2 in pci_qdev_realize (qdev=0x5638996e0e70, +> +>> errp=0x7ffd3c2b8250) at ../hw/pci/pci.c:2174 +> +>> #4 0x00005638970eb54b in device_set_realized (obj=0x5638996e0e70, +> +>> value=true, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:494 +> +>> #5 0x00005638970f5e14 in property_set_bool (obj=0x5638996e0e70, +> +>> v=0x5638996f3770, name=0x56389759b141 "realized", +> +>> opaque=0x5638987893d0, errp=0x7ffd3c2b84e0) +> +>>     at ../qom/object.c:2374 +> +>> #6 0x00005638970f39f8 in object_property_set (obj=0x5638996e0e70, +> +>> name=0x56389759b141 "realized", v=0x5638996f3770, errp=0x7ffd3c2b84e0) +> +>>     at ../qom/object.c:1449 +> +>> #7 0x00005638970f8586 in object_property_set_qobject +> +>> (obj=0x5638996e0e70, name=0x56389759b141 "realized", +> +>> value=0x5638996df900, errp=0x7ffd3c2b84e0) +> +>>     at ../qom/qom-qobject.c:28 +> +>> #8 0x00005638970f3d8d in object_property_set_bool +> +>> (obj=0x5638996e0e70, name=0x56389759b141 "realized", value=true, +> +>> errp=0x7ffd3c2b84e0) +> +>>     at ../qom/object.c:1519 +> +>> #9 0x00005638970eacb0 in qdev_realize (dev=0x5638996e0e70, +> +>> bus=0x563898cf3c20, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:276 +> +>> #10 0x0000563896dba675 in qdev_device_add_from_qdict +> +>> (opts=0x5638996dfe50, from_json=false, errp=0x7ffd3c2b84e0) at ../ +> +>> system/qdev-monitor.c:714 +> +>> #11 0x0000563896dba721 in qdev_device_add (opts=0x563898786150, +> +>> errp=0x56389855dc40 <error_fatal>) at ../system/qdev-monitor.c:733 +> +>> #12 0x0000563896dc48f1 in device_init_func (opaque=0x0, +> +>> opts=0x563898786150, errp=0x56389855dc40 <error_fatal>) at ../system/ +> +>> vl.c:1207 +> +>> #13 0x000056389737a6cc in qemu_opts_foreach +> +>>     (list=0x563898427b60 <qemu_device_opts>, func=0x563896dc48ca +> +>> <device_init_func>, opaque=0x0, errp=0x56389855dc40 <error_fatal>) +> +>>     at ../util/qemu-option.c:1135 +> +>> #14 0x0000563896dc89b5 in qemu_create_cli_devices () at ../system/ +> +>> vl.c:2745 +> +>> #15 0x0000563896dc8c00 in qmp_x_exit_preconfig (errp=0x56389855dc40 +> +>> <error_fatal>) at ../system/vl.c:2806 +> +>> #16 0x0000563896dcb5de in qemu_init (argc=33, argv=0x7ffd3c2b8948) +> +>> at ../system/vl.c:3838 +> +>> #17 0x0000563897297323 in main (argc=33, argv=0x7ffd3c2b8948) at ../ +> +>> system/main.c:72 +> +> +> +> So the attached adjusted version of your patch does seem to help. At +> +> least I can't reproduce the crash on my stand. +> +> +Thanks for the stack trace; the calls to SPICE_RING_INIT in init_qxl_ram +> +are +> +definitely harmful. Try V2 of the patch, attached, which skips the lines +> +of init_qxl_ram that modify guest memory. +> +Thanks, your v2 patch does seem to prevent the crash. Would you re-send +it to the list as a proper fix? + +> +> I'm wondering, could it be useful to explicitly mark all the reused +> +> memory regions readonly upon cpr-transfer, and then make them writable +> +> back again after the migration is done? That way we will be segfaulting +> +> early on instead of debugging tricky memory corruptions. +> +> +It's a useful debugging technique, but changing protection on a large +> +memory region +> +can be too expensive for production due to TLB shootdowns. +> +> +Also, there are cases where writes are performed but the value is +> +guaranteed to +> +be the same: +> + qxl_post_load() +> +   qxl_set_mode() +> +     d->rom->mode = cpu_to_le32(modenr); +> +The value is the same because mode and shadow_rom.mode were passed in +> +vmstate +> +from old qemu. +> +There're also cases where devices' ROM might be re-initialized. E.g. +this segfault occures upon further exploration of RO mapped RAM blocks: + +> +Program terminated with signal SIGSEGV, Segmentation fault. +> +#0 __memmove_avx_unaligned_erms () at +> +../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:664 +> +664 rep movsb +> +[Current thread is 1 (Thread 0x7f6e7d08b480 (LWP 310379))] +> +(gdb) bt +> +#0 __memmove_avx_unaligned_erms () at +> +../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:664 +> +#1 0x000055aa1d030ecd in rom_set_mr (rom=0x55aa200ba380, +> +owner=0x55aa2019ac10, name=0x7fffb8272bc0 "/rom@etc/acpi/tables", ro=true) +> +at ../hw/core/loader.c:1032 +> +#2 0x000055aa1d031577 in rom_add_blob +> +(name=0x55aa1da51f13 "etc/acpi/tables", blob=0x55aa208a1070, len=131072, +> +max_len=2097152, addr=18446744073709551615, fw_file_name=0x55aa1da51f13 +> +"etc/acpi/tables", fw_callback=0x55aa1d441f59 <acpi_build_update>, +> +callback_opaque=0x55aa20ff0010, as=0x0, read_only=true) at +> +../hw/core/loader.c:1147 +> +#3 0x000055aa1cfd788d in acpi_add_rom_blob +> +(update=0x55aa1d441f59 <acpi_build_update>, opaque=0x55aa20ff0010, +> +blob=0x55aa1fc9aa00, name=0x55aa1da51f13 "etc/acpi/tables") at +> +../hw/acpi/utils.c:46 +> +#4 0x000055aa1d44213f in acpi_setup () at ../hw/i386/acpi-build.c:2720 +> +#5 0x000055aa1d434199 in pc_machine_done (notifier=0x55aa1ff15050, data=0x0) +> +at ../hw/i386/pc.c:638 +> +#6 0x000055aa1d876845 in notifier_list_notify (list=0x55aa1ea25c10 +> +<machine_init_done_notifiers>, data=0x0) at ../util/notify.c:39 +> +#7 0x000055aa1d039ee5 in qdev_machine_creation_done () at +> +../hw/core/machine.c:1749 +> +#8 0x000055aa1d2c7b3e in qemu_machine_creation_done (errp=0x55aa1ea5cc40 +> +<error_fatal>) at ../system/vl.c:2779 +> +#9 0x000055aa1d2c7c7d in qmp_x_exit_preconfig (errp=0x55aa1ea5cc40 +> +<error_fatal>) at ../system/vl.c:2807 +> +#10 0x000055aa1d2ca64f in qemu_init (argc=35, argv=0x7fffb82730e8) at +> +../system/vl.c:3838 +> +#11 0x000055aa1d79638c in main (argc=35, argv=0x7fffb82730e8) at +> +../system/main.c:72 +I'm not sure whether ACPI tables ROM in particular is rewritten with the +same content, but there might be cases where ROM can be read from file +system upon initialization. That is undesirable as guest kernel +certainly won't be too happy about sudden change of the device's ROM +content. + +So the issue we're dealing with here is any unwanted memory related +device initialization upon cpr. + +For now the only thing that comes to my mind is to make a test where we +put as many devices as we can into a VM, make ram blocks RO upon cpr +(and remap them as RW later after migration is done, if needed), and +catch any unwanted memory violations. As Den suggested, we might +consider adding that behaviour as a separate non-default option (or +"migrate" command flag specific to cpr-transfer), which would only be +used in the testing. + +Andrey + +On 3/6/25 16:16, Andrey Drobyshev wrote: +On 3/5/25 11:19 PM, Steven Sistare wrote: +On 3/5/2025 11:50 AM, Andrey Drobyshev wrote: +On 3/4/25 9:05 PM, Steven Sistare wrote: +On 2/28/2025 1:37 PM, Andrey Drobyshev wrote: +On 2/28/25 8:35 PM, Andrey Drobyshev wrote: +On 2/28/25 8:20 PM, Steven Sistare wrote: +On 2/28/2025 1:13 PM, Steven Sistare wrote: +On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: +Hi all, + +We've been experimenting with cpr-transfer migration mode recently +and +have discovered the following issue with the guest QXL driver: + +Run migration source: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-src.sock + +$EMULATOR -enable-kvm \ +       -machine q35 \ +       -cpu host -smp 2 -m 2G \ +       -object memory-backend-file,id=ram0,size=2G,mem-path=/ +dev/shm/ +ram0,share=on\ +       -machine memory-backend=ram0 \ +       -machine aux-ram-share=on \ +       -drive file=$ROOTFS,media=disk,if=virtio \ +       -qmp unix:$QMPSOCK,server=on,wait=off \ +       -nographic \ +       -device qxl-vga +Run migration target: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-dst.sock +$EMULATOR -enable-kvm \ +       -machine q35 \ +       -cpu host -smp 2 -m 2G \ +       -object memory-backend-file,id=ram0,size=2G,mem-path=/ +dev/shm/ +ram0,share=on\ +       -machine memory-backend=ram0 \ +       -machine aux-ram-share=on \ +       -drive file=$ROOTFS,media=disk,if=virtio \ +       -qmp unix:$QMPSOCK,server=on,wait=off \ +       -nographic \ +       -device qxl-vga \ +       -incoming tcp:0:44444 \ +       -incoming '{"channel-type": "cpr", "addr": { "transport": +"socket", "type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' +Launch the migration: +QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell +QMPSOCK=/var/run/alma8qmp-src.sock + +$QMPSHELL -p $QMPSOCK <<EOF +       migrate-set-parameters mode=cpr-transfer +       migrate channels=[{"channel-type":"main","addr": +{"transport":"socket","type":"inet","host":"0","port":"44444"}}, +{"channel-type":"cpr","addr": +{"transport":"socket","type":"unix","path":"/var/run/alma8cpr- +dst.sock"}}] +EOF +Then, after a while, QXL guest driver on target crashes spewing the +following messages: +[  73.962002] [TTM] Buffer eviction failed +[  73.962072] qxl 0000:00:02.0: object_init failed for (3149824, +0x00000001) +[  73.962081] [drm:qxl_alloc_bo_reserved [qxl]] *ERROR* failed to +allocate VRAM BO +That seems to be a known kernel QXL driver bug: +https://lore.kernel.org/all/20220907094423.93581-1- +min_halo@163.com/T/ +https://lore.kernel.org/lkml/ZTgydqRlK6WX_b29@eldamar.lan/ +(the latter discussion contains that reproduce script which +speeds up +the crash in the guest): +#!/bin/bash + +chvt 3 + +for j in $(seq 80); do +           echo "$(date) starting round $j" +           if [ "$(journalctl --boot | grep "failed to allocate +VRAM +BO")" != "" ]; then +                   echo "bug was reproduced after $j tries" +                   exit 1 +           fi +           for i in $(seq 100); do +                   dmesg > /dev/tty3 +           done +done + +echo "bug could not be reproduced" +exit 0 +The bug itself seems to remain unfixed, as I was able to reproduce +that +with Fedora 41 guest, as well as AlmaLinux 8 guest. However our +cpr-transfer code also seems to be buggy as it triggers the crash - +without the cpr-transfer migration the above reproduce doesn't +lead to +crash on the source VM. + +I suspect that, as cpr-transfer doesn't migrate the guest +memory, but +rather passes it through the memory backend object, our code might +somehow corrupt the VRAM. However, I wasn't able to trace the +corruption so far. + +Could somebody help the investigation and take a look into +this? Any +suggestions would be appreciated. Thanks! +Possibly some memory region created by qxl is not being preserved. +Try adding these traces to see what is preserved: + +-trace enable='*cpr*' +-trace enable='*ram_alloc*' +Also try adding this patch to see if it flags any ram blocks as not +compatible with cpr. A message is printed at migration start time. +    +https://lore.kernel.org/qemu-devel/1740667681-257312-1-git-send- +email- +steven.sistare@oracle.com/ + +- Steve +With the traces enabled + the "migration: ram block cpr blockers" +patch +applied: + +Source: +cpr_find_fd pc.bios, id 0 returns -1 +cpr_save_fd pc.bios, id 0, fd 22 +qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 22 host +0x7fec18e00000 +cpr_find_fd pc.rom, id 0 returns -1 +cpr_save_fd pc.rom, id 0, fd 23 +qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 23 host +0x7fec18c00000 +cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns -1 +cpr_save_fd 0000:00:01.0/e1000e.rom, id 0, fd 24 +qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size +262144 fd 24 host 0x7fec18a00000 +cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns -1 +cpr_save_fd 0000:00:02.0/vga.vram, id 0, fd 25 +qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size +67108864 fd 25 host 0x7feb77e00000 +cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.vrom, id 0, fd 27 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 +fd 27 host 0x7fec18800000 +cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.vram, id 0, fd 28 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size +67108864 fd 28 host 0x7feb73c00000 +cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.rom, id 0, fd 34 +qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 +fd 34 host 0x7fec18600000 +cpr_find_fd /rom@etc/acpi/tables, id 0 returns -1 +cpr_save_fd /rom@etc/acpi/tables, id 0, fd 35 +qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size +2097152 fd 35 host 0x7fec18200000 +cpr_find_fd /rom@etc/table-loader, id 0 returns -1 +cpr_save_fd /rom@etc/table-loader, id 0, fd 36 +qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 +fd 36 host 0x7feb8b600000 +cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns -1 +cpr_save_fd /rom@etc/acpi/rsdp, id 0, fd 37 +qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd +37 host 0x7feb8b400000 + +cpr_state_save cpr-transfer mode +cpr_transfer_output /var/run/alma8cpr-dst.sock +Target: +cpr_transfer_input /var/run/alma8cpr-dst.sock +cpr_state_load cpr-transfer mode +cpr_find_fd pc.bios, id 0 returns 20 +qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 20 host +0x7fcdc9800000 +cpr_find_fd pc.rom, id 0 returns 19 +qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 19 host +0x7fcdc9600000 +cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns 18 +qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size +262144 fd 18 host 0x7fcdc9400000 +cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns 17 +qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size +67108864 fd 17 host 0x7fcd27e00000 +cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns 16 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 +fd 16 host 0x7fcdc9200000 +cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns 15 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size +67108864 fd 15 host 0x7fcd23c00000 +cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns 14 +qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 +fd 14 host 0x7fcdc8800000 +cpr_find_fd /rom@etc/acpi/tables, id 0 returns 13 +qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size +2097152 fd 13 host 0x7fcdc8400000 +cpr_find_fd /rom@etc/table-loader, id 0 returns 11 +qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 +fd 11 host 0x7fcdc8200000 +cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns 10 +qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd +10 host 0x7fcd3be00000 +Looks like both vga.vram and qxl.vram are being preserved (with the +same +addresses), and no incompatible ram blocks are found during migration. +Sorry, addressed are not the same, of course. However corresponding +ram +blocks do seem to be preserved and initialized. +So far, I have not reproduced the guest driver failure. + +However, I have isolated places where new QEMU improperly writes to +the qxl memory regions prior to starting the guest, by mmap'ing them +readonly after cpr: + +   qemu_ram_alloc_internal() +     if (reused && (strstr(name, "qxl") || strstr("name", "vga"))) +         ram_flags |= RAM_READONLY; +     new_block = qemu_ram_alloc_from_fd(...) + +I have attached a draft fix; try it and let me know. +My console window looks fine before and after cpr, using +-vnc $hostip:0 -vga qxl + +- Steve +Regarding the reproduce: when I launch the buggy version with the same +options as you, i.e. "-vnc 0.0.0.0:$port -vga qxl", and do cpr-transfer, +my VNC client silently hangs on the target after a while. Could it +happen on your stand as well? +cpr does not preserve the vnc connection and session. To test, I specify +port 0 for the source VM and port 1 for the dest. When the src vnc goes +dormant the dest vnc becomes active. +Sure, I meant that VNC on the dest (on the port 1) works for a while +after the migration and then hangs, apparently after the guest QXL crash. +Could you try launching VM with +"-nographic -device qxl-vga"? That way VM's serial console is given you +directly in the shell, so when qxl driver crashes you're still able to +inspect the kernel messages. +I have been running like that, but have not reproduced the qxl driver +crash, +and I suspect my guest image+kernel is too old. +Yes, that's probably the case. But the crash occurs on my Fedora 41 +guest with the 6.11.5-300.fc41.x86_64 kernel, so newer kernels seem to +be buggy. +However, once I realized the +issue was post-cpr modification of qxl memory, I switched my attention +to the +fix. +As for your patch, I can report that it doesn't resolve the issue as it +is. But I was able to track down another possible memory corruption +using your approach with readonly mmap'ing: +Program terminated with signal SIGSEGV, Segmentation fault. +#0 init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 +412        d->ram->magic      = cpu_to_le32(QXL_RAM_MAGIC); +[Current thread is 1 (Thread 0x7f1a4f83b480 (LWP 229798))] +(gdb) bt +#0 init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 +#1 0x0000563896e7f467 in qxl_realize_common (qxl=0x5638996e0e70, +errp=0x7ffd3c2b8170) at ../hw/display/qxl.c:2142 +#2 0x0000563896e7fda1 in qxl_realize_primary (dev=0x5638996e0e70, +errp=0x7ffd3c2b81d0) at ../hw/display/qxl.c:2257 +#3 0x0000563896c7e8f2 in pci_qdev_realize (qdev=0x5638996e0e70, +errp=0x7ffd3c2b8250) at ../hw/pci/pci.c:2174 +#4 0x00005638970eb54b in device_set_realized (obj=0x5638996e0e70, +value=true, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:494 +#5 0x00005638970f5e14 in property_set_bool (obj=0x5638996e0e70, +v=0x5638996f3770, name=0x56389759b141 "realized", +opaque=0x5638987893d0, errp=0x7ffd3c2b84e0) +     at ../qom/object.c:2374 +#6 0x00005638970f39f8 in object_property_set (obj=0x5638996e0e70, +name=0x56389759b141 "realized", v=0x5638996f3770, errp=0x7ffd3c2b84e0) +     at ../qom/object.c:1449 +#7 0x00005638970f8586 in object_property_set_qobject +(obj=0x5638996e0e70, name=0x56389759b141 "realized", +value=0x5638996df900, errp=0x7ffd3c2b84e0) +     at ../qom/qom-qobject.c:28 +#8 0x00005638970f3d8d in object_property_set_bool +(obj=0x5638996e0e70, name=0x56389759b141 "realized", value=true, +errp=0x7ffd3c2b84e0) +     at ../qom/object.c:1519 +#9 0x00005638970eacb0 in qdev_realize (dev=0x5638996e0e70, +bus=0x563898cf3c20, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:276 +#10 0x0000563896dba675 in qdev_device_add_from_qdict +(opts=0x5638996dfe50, from_json=false, errp=0x7ffd3c2b84e0) at ../ +system/qdev-monitor.c:714 +#11 0x0000563896dba721 in qdev_device_add (opts=0x563898786150, +errp=0x56389855dc40 <error_fatal>) at ../system/qdev-monitor.c:733 +#12 0x0000563896dc48f1 in device_init_func (opaque=0x0, +opts=0x563898786150, errp=0x56389855dc40 <error_fatal>) at ../system/ +vl.c:1207 +#13 0x000056389737a6cc in qemu_opts_foreach +     (list=0x563898427b60 <qemu_device_opts>, func=0x563896dc48ca +<device_init_func>, opaque=0x0, errp=0x56389855dc40 <error_fatal>) +     at ../util/qemu-option.c:1135 +#14 0x0000563896dc89b5 in qemu_create_cli_devices () at ../system/ +vl.c:2745 +#15 0x0000563896dc8c00 in qmp_x_exit_preconfig (errp=0x56389855dc40 +<error_fatal>) at ../system/vl.c:2806 +#16 0x0000563896dcb5de in qemu_init (argc=33, argv=0x7ffd3c2b8948) +at ../system/vl.c:3838 +#17 0x0000563897297323 in main (argc=33, argv=0x7ffd3c2b8948) at ../ +system/main.c:72 +So the attached adjusted version of your patch does seem to help. At +least I can't reproduce the crash on my stand. +Thanks for the stack trace; the calls to SPICE_RING_INIT in init_qxl_ram +are +definitely harmful. Try V2 of the patch, attached, which skips the lines +of init_qxl_ram that modify guest memory. +Thanks, your v2 patch does seem to prevent the crash. Would you re-send +it to the list as a proper fix? +I'm wondering, could it be useful to explicitly mark all the reused +memory regions readonly upon cpr-transfer, and then make them writable +back again after the migration is done? That way we will be segfaulting +early on instead of debugging tricky memory corruptions. +It's a useful debugging technique, but changing protection on a large +memory region +can be too expensive for production due to TLB shootdowns. + +Also, there are cases where writes are performed but the value is +guaranteed to +be the same: +  qxl_post_load() +    qxl_set_mode() +      d->rom->mode = cpu_to_le32(modenr); +The value is the same because mode and shadow_rom.mode were passed in +vmstate +from old qemu. +There're also cases where devices' ROM might be re-initialized. E.g. +this segfault occures upon further exploration of RO mapped RAM blocks: +Program terminated with signal SIGSEGV, Segmentation fault. +#0 __memmove_avx_unaligned_erms () at +../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:664 +664 rep movsb +[Current thread is 1 (Thread 0x7f6e7d08b480 (LWP 310379))] +(gdb) bt +#0 __memmove_avx_unaligned_erms () at +../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:664 +#1 0x000055aa1d030ecd in rom_set_mr (rom=0x55aa200ba380, owner=0x55aa2019ac10, +name=0x7fffb8272bc0 "/rom@etc/acpi/tables", ro=true) + at ../hw/core/loader.c:1032 +#2 0x000055aa1d031577 in rom_add_blob + (name=0x55aa1da51f13 "etc/acpi/tables", blob=0x55aa208a1070, len=131072, max_len=2097152, +addr=18446744073709551615, fw_file_name=0x55aa1da51f13 "etc/acpi/tables", +fw_callback=0x55aa1d441f59 <acpi_build_update>, callback_opaque=0x55aa20ff0010, as=0x0, +read_only=true) at ../hw/core/loader.c:1147 +#3 0x000055aa1cfd788d in acpi_add_rom_blob + (update=0x55aa1d441f59 <acpi_build_update>, opaque=0x55aa20ff0010, +blob=0x55aa1fc9aa00, name=0x55aa1da51f13 "etc/acpi/tables") at ../hw/acpi/utils.c:46 +#4 0x000055aa1d44213f in acpi_setup () at ../hw/i386/acpi-build.c:2720 +#5 0x000055aa1d434199 in pc_machine_done (notifier=0x55aa1ff15050, data=0x0) +at ../hw/i386/pc.c:638 +#6 0x000055aa1d876845 in notifier_list_notify (list=0x55aa1ea25c10 +<machine_init_done_notifiers>, data=0x0) at ../util/notify.c:39 +#7 0x000055aa1d039ee5 in qdev_machine_creation_done () at +../hw/core/machine.c:1749 +#8 0x000055aa1d2c7b3e in qemu_machine_creation_done (errp=0x55aa1ea5cc40 +<error_fatal>) at ../system/vl.c:2779 +#9 0x000055aa1d2c7c7d in qmp_x_exit_preconfig (errp=0x55aa1ea5cc40 +<error_fatal>) at ../system/vl.c:2807 +#10 0x000055aa1d2ca64f in qemu_init (argc=35, argv=0x7fffb82730e8) at +../system/vl.c:3838 +#11 0x000055aa1d79638c in main (argc=35, argv=0x7fffb82730e8) at +../system/main.c:72 +I'm not sure whether ACPI tables ROM in particular is rewritten with the +same content, but there might be cases where ROM can be read from file +system upon initialization. That is undesirable as guest kernel +certainly won't be too happy about sudden change of the device's ROM +content. + +So the issue we're dealing with here is any unwanted memory related +device initialization upon cpr. + +For now the only thing that comes to my mind is to make a test where we +put as many devices as we can into a VM, make ram blocks RO upon cpr +(and remap them as RW later after migration is done, if needed), and +catch any unwanted memory violations. As Den suggested, we might +consider adding that behaviour as a separate non-default option (or +"migrate" command flag specific to cpr-transfer), which would only be +used in the testing. + +Andrey +No way. ACPI with the source must be used in the same way as BIOSes +and optional ROMs. + +Den + +On 3/6/2025 10:52 AM, Denis V. Lunev wrote: +On 3/6/25 16:16, Andrey Drobyshev wrote: +On 3/5/25 11:19 PM, Steven Sistare wrote: +On 3/5/2025 11:50 AM, Andrey Drobyshev wrote: +On 3/4/25 9:05 PM, Steven Sistare wrote: +On 2/28/2025 1:37 PM, Andrey Drobyshev wrote: +On 2/28/25 8:35 PM, Andrey Drobyshev wrote: +On 2/28/25 8:20 PM, Steven Sistare wrote: +On 2/28/2025 1:13 PM, Steven Sistare wrote: +On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: +Hi all, + +We've been experimenting with cpr-transfer migration mode recently +and +have discovered the following issue with the guest QXL driver: + +Run migration source: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-src.sock + +$EMULATOR -enable-kvm \ +       -machine q35 \ +       -cpu host -smp 2 -m 2G \ +       -object memory-backend-file,id=ram0,size=2G,mem-path=/ +dev/shm/ +ram0,share=on\ +       -machine memory-backend=ram0 \ +       -machine aux-ram-share=on \ +       -drive file=$ROOTFS,media=disk,if=virtio \ +       -qmp unix:$QMPSOCK,server=on,wait=off \ +       -nographic \ +       -device qxl-vga +Run migration target: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-dst.sock +$EMULATOR -enable-kvm \ +       -machine q35 \ +       -cpu host -smp 2 -m 2G \ +       -object memory-backend-file,id=ram0,size=2G,mem-path=/ +dev/shm/ +ram0,share=on\ +       -machine memory-backend=ram0 \ +       -machine aux-ram-share=on \ +       -drive file=$ROOTFS,media=disk,if=virtio \ +       -qmp unix:$QMPSOCK,server=on,wait=off \ +       -nographic \ +       -device qxl-vga \ +       -incoming tcp:0:44444 \ +       -incoming '{"channel-type": "cpr", "addr": { "transport": +"socket", "type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' +Launch the migration: +QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell +QMPSOCK=/var/run/alma8qmp-src.sock + +$QMPSHELL -p $QMPSOCK <<EOF +       migrate-set-parameters mode=cpr-transfer +       migrate channels=[{"channel-type":"main","addr": +{"transport":"socket","type":"inet","host":"0","port":"44444"}}, +{"channel-type":"cpr","addr": +{"transport":"socket","type":"unix","path":"/var/run/alma8cpr- +dst.sock"}}] +EOF +Then, after a while, QXL guest driver on target crashes spewing the +following messages: +[  73.962002] [TTM] Buffer eviction failed +[  73.962072] qxl 0000:00:02.0: object_init failed for (3149824, +0x00000001) +[  73.962081] [drm:qxl_alloc_bo_reserved [qxl]] *ERROR* failed to +allocate VRAM BO +That seems to be a known kernel QXL driver bug: +https://lore.kernel.org/all/20220907094423.93581-1- +min_halo@163.com/T/ +https://lore.kernel.org/lkml/ZTgydqRlK6WX_b29@eldamar.lan/ +(the latter discussion contains that reproduce script which +speeds up +the crash in the guest): +#!/bin/bash + +chvt 3 + +for j in $(seq 80); do +           echo "$(date) starting round $j" +           if [ "$(journalctl --boot | grep "failed to allocate +VRAM +BO")" != "" ]; then +                   echo "bug was reproduced after $j tries" +                   exit 1 +           fi +           for i in $(seq 100); do +                   dmesg > /dev/tty3 +           done +done + +echo "bug could not be reproduced" +exit 0 +The bug itself seems to remain unfixed, as I was able to reproduce +that +with Fedora 41 guest, as well as AlmaLinux 8 guest. However our +cpr-transfer code also seems to be buggy as it triggers the crash - +without the cpr-transfer migration the above reproduce doesn't +lead to +crash on the source VM. + +I suspect that, as cpr-transfer doesn't migrate the guest +memory, but +rather passes it through the memory backend object, our code might +somehow corrupt the VRAM. However, I wasn't able to trace the +corruption so far. + +Could somebody help the investigation and take a look into +this? Any +suggestions would be appreciated. Thanks! +Possibly some memory region created by qxl is not being preserved. +Try adding these traces to see what is preserved: + +-trace enable='*cpr*' +-trace enable='*ram_alloc*' +Also try adding this patch to see if it flags any ram blocks as not +compatible with cpr. A message is printed at migration start time. +    +https://lore.kernel.org/qemu-devel/1740667681-257312-1-git-send- +email- +steven.sistare@oracle.com/ + +- Steve +With the traces enabled + the "migration: ram block cpr blockers" +patch +applied: + +Source: +cpr_find_fd pc.bios, id 0 returns -1 +cpr_save_fd pc.bios, id 0, fd 22 +qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 22 host +0x7fec18e00000 +cpr_find_fd pc.rom, id 0 returns -1 +cpr_save_fd pc.rom, id 0, fd 23 +qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 23 host +0x7fec18c00000 +cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns -1 +cpr_save_fd 0000:00:01.0/e1000e.rom, id 0, fd 24 +qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size +262144 fd 24 host 0x7fec18a00000 +cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns -1 +cpr_save_fd 0000:00:02.0/vga.vram, id 0, fd 25 +qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size +67108864 fd 25 host 0x7feb77e00000 +cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.vrom, id 0, fd 27 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 +fd 27 host 0x7fec18800000 +cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.vram, id 0, fd 28 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size +67108864 fd 28 host 0x7feb73c00000 +cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.rom, id 0, fd 34 +qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 +fd 34 host 0x7fec18600000 +cpr_find_fd /rom@etc/acpi/tables, id 0 returns -1 +cpr_save_fd /rom@etc/acpi/tables, id 0, fd 35 +qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size +2097152 fd 35 host 0x7fec18200000 +cpr_find_fd /rom@etc/table-loader, id 0 returns -1 +cpr_save_fd /rom@etc/table-loader, id 0, fd 36 +qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 +fd 36 host 0x7feb8b600000 +cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns -1 +cpr_save_fd /rom@etc/acpi/rsdp, id 0, fd 37 +qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd +37 host 0x7feb8b400000 + +cpr_state_save cpr-transfer mode +cpr_transfer_output /var/run/alma8cpr-dst.sock +Target: +cpr_transfer_input /var/run/alma8cpr-dst.sock +cpr_state_load cpr-transfer mode +cpr_find_fd pc.bios, id 0 returns 20 +qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 20 host +0x7fcdc9800000 +cpr_find_fd pc.rom, id 0 returns 19 +qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 19 host +0x7fcdc9600000 +cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns 18 +qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size +262144 fd 18 host 0x7fcdc9400000 +cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns 17 +qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size +67108864 fd 17 host 0x7fcd27e00000 +cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns 16 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 +fd 16 host 0x7fcdc9200000 +cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns 15 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size +67108864 fd 15 host 0x7fcd23c00000 +cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns 14 +qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 +fd 14 host 0x7fcdc8800000 +cpr_find_fd /rom@etc/acpi/tables, id 0 returns 13 +qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size +2097152 fd 13 host 0x7fcdc8400000 +cpr_find_fd /rom@etc/table-loader, id 0 returns 11 +qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 +fd 11 host 0x7fcdc8200000 +cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns 10 +qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd +10 host 0x7fcd3be00000 +Looks like both vga.vram and qxl.vram are being preserved (with the +same +addresses), and no incompatible ram blocks are found during migration. +Sorry, addressed are not the same, of course. However corresponding +ram +blocks do seem to be preserved and initialized. +So far, I have not reproduced the guest driver failure. + +However, I have isolated places where new QEMU improperly writes to +the qxl memory regions prior to starting the guest, by mmap'ing them +readonly after cpr: + +   qemu_ram_alloc_internal() +     if (reused && (strstr(name, "qxl") || strstr("name", "vga"))) +         ram_flags |= RAM_READONLY; +     new_block = qemu_ram_alloc_from_fd(...) + +I have attached a draft fix; try it and let me know. +My console window looks fine before and after cpr, using +-vnc $hostip:0 -vga qxl + +- Steve +Regarding the reproduce: when I launch the buggy version with the same +options as you, i.e. "-vnc 0.0.0.0:$port -vga qxl", and do cpr-transfer, +my VNC client silently hangs on the target after a while. Could it +happen on your stand as well? +cpr does not preserve the vnc connection and session. To test, I specify +port 0 for the source VM and port 1 for the dest. When the src vnc goes +dormant the dest vnc becomes active. +Sure, I meant that VNC on the dest (on the port 1) works for a while +after the migration and then hangs, apparently after the guest QXL crash. +Could you try launching VM with +"-nographic -device qxl-vga"? That way VM's serial console is given you +directly in the shell, so when qxl driver crashes you're still able to +inspect the kernel messages. +I have been running like that, but have not reproduced the qxl driver +crash, +and I suspect my guest image+kernel is too old. +Yes, that's probably the case. But the crash occurs on my Fedora 41 +guest with the 6.11.5-300.fc41.x86_64 kernel, so newer kernels seem to +be buggy. +However, once I realized the +issue was post-cpr modification of qxl memory, I switched my attention +to the +fix. +As for your patch, I can report that it doesn't resolve the issue as it +is. But I was able to track down another possible memory corruption +using your approach with readonly mmap'ing: +Program terminated with signal SIGSEGV, Segmentation fault. +#0 init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 +412        d->ram->magic      = cpu_to_le32(QXL_RAM_MAGIC); +[Current thread is 1 (Thread 0x7f1a4f83b480 (LWP 229798))] +(gdb) bt +#0 init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 +#1 0x0000563896e7f467 in qxl_realize_common (qxl=0x5638996e0e70, +errp=0x7ffd3c2b8170) at ../hw/display/qxl.c:2142 +#2 0x0000563896e7fda1 in qxl_realize_primary (dev=0x5638996e0e70, +errp=0x7ffd3c2b81d0) at ../hw/display/qxl.c:2257 +#3 0x0000563896c7e8f2 in pci_qdev_realize (qdev=0x5638996e0e70, +errp=0x7ffd3c2b8250) at ../hw/pci/pci.c:2174 +#4 0x00005638970eb54b in device_set_realized (obj=0x5638996e0e70, +value=true, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:494 +#5 0x00005638970f5e14 in property_set_bool (obj=0x5638996e0e70, +v=0x5638996f3770, name=0x56389759b141 "realized", +opaque=0x5638987893d0, errp=0x7ffd3c2b84e0) +     at ../qom/object.c:2374 +#6 0x00005638970f39f8 in object_property_set (obj=0x5638996e0e70, +name=0x56389759b141 "realized", v=0x5638996f3770, errp=0x7ffd3c2b84e0) +     at ../qom/object.c:1449 +#7 0x00005638970f8586 in object_property_set_qobject +(obj=0x5638996e0e70, name=0x56389759b141 "realized", +value=0x5638996df900, errp=0x7ffd3c2b84e0) +     at ../qom/qom-qobject.c:28 +#8 0x00005638970f3d8d in object_property_set_bool +(obj=0x5638996e0e70, name=0x56389759b141 "realized", value=true, +errp=0x7ffd3c2b84e0) +     at ../qom/object.c:1519 +#9 0x00005638970eacb0 in qdev_realize (dev=0x5638996e0e70, +bus=0x563898cf3c20, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:276 +#10 0x0000563896dba675 in qdev_device_add_from_qdict +(opts=0x5638996dfe50, from_json=false, errp=0x7ffd3c2b84e0) at ../ +system/qdev-monitor.c:714 +#11 0x0000563896dba721 in qdev_device_add (opts=0x563898786150, +errp=0x56389855dc40 <error_fatal>) at ../system/qdev-monitor.c:733 +#12 0x0000563896dc48f1 in device_init_func (opaque=0x0, +opts=0x563898786150, errp=0x56389855dc40 <error_fatal>) at ../system/ +vl.c:1207 +#13 0x000056389737a6cc in qemu_opts_foreach +     (list=0x563898427b60 <qemu_device_opts>, func=0x563896dc48ca +<device_init_func>, opaque=0x0, errp=0x56389855dc40 <error_fatal>) +     at ../util/qemu-option.c:1135 +#14 0x0000563896dc89b5 in qemu_create_cli_devices () at ../system/ +vl.c:2745 +#15 0x0000563896dc8c00 in qmp_x_exit_preconfig (errp=0x56389855dc40 +<error_fatal>) at ../system/vl.c:2806 +#16 0x0000563896dcb5de in qemu_init (argc=33, argv=0x7ffd3c2b8948) +at ../system/vl.c:3838 +#17 0x0000563897297323 in main (argc=33, argv=0x7ffd3c2b8948) at ../ +system/main.c:72 +So the attached adjusted version of your patch does seem to help. At +least I can't reproduce the crash on my stand. +Thanks for the stack trace; the calls to SPICE_RING_INIT in init_qxl_ram +are +definitely harmful. Try V2 of the patch, attached, which skips the lines +of init_qxl_ram that modify guest memory. +Thanks, your v2 patch does seem to prevent the crash. Would you re-send +it to the list as a proper fix? +Yes. Was waiting for your confirmation. +I'm wondering, could it be useful to explicitly mark all the reused +memory regions readonly upon cpr-transfer, and then make them writable +back again after the migration is done? That way we will be segfaulting +early on instead of debugging tricky memory corruptions. +It's a useful debugging technique, but changing protection on a large +memory region +can be too expensive for production due to TLB shootdowns. + +Also, there are cases where writes are performed but the value is +guaranteed to +be the same: +  qxl_post_load() +    qxl_set_mode() +      d->rom->mode = cpu_to_le32(modenr); +The value is the same because mode and shadow_rom.mode were passed in +vmstate +from old qemu. +There're also cases where devices' ROM might be re-initialized. E.g. +this segfault occures upon further exploration of RO mapped RAM blocks: +Program terminated with signal SIGSEGV, Segmentation fault. +#0 __memmove_avx_unaligned_erms () at +../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:664 +664            rep    movsb +[Current thread is 1 (Thread 0x7f6e7d08b480 (LWP 310379))] +(gdb) bt +#0 __memmove_avx_unaligned_erms () at +../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:664 +#1 0x000055aa1d030ecd in rom_set_mr (rom=0x55aa200ba380, owner=0x55aa2019ac10, +name=0x7fffb8272bc0 "/rom@etc/acpi/tables", ro=true) +    at ../hw/core/loader.c:1032 +#2 0x000055aa1d031577 in rom_add_blob +    (name=0x55aa1da51f13 "etc/acpi/tables", blob=0x55aa208a1070, len=131072, max_len=2097152, +addr=18446744073709551615, fw_file_name=0x55aa1da51f13 "etc/acpi/tables", +fw_callback=0x55aa1d441f59 <acpi_build_update>, callback_opaque=0x55aa20ff0010, as=0x0, +read_only=true) at ../hw/core/loader.c:1147 +#3 0x000055aa1cfd788d in acpi_add_rom_blob +    (update=0x55aa1d441f59 <acpi_build_update>, opaque=0x55aa20ff0010, +blob=0x55aa1fc9aa00, name=0x55aa1da51f13 "etc/acpi/tables") at ../hw/acpi/utils.c:46 +#4 0x000055aa1d44213f in acpi_setup () at ../hw/i386/acpi-build.c:2720 +#5 0x000055aa1d434199 in pc_machine_done (notifier=0x55aa1ff15050, data=0x0) +at ../hw/i386/pc.c:638 +#6 0x000055aa1d876845 in notifier_list_notify (list=0x55aa1ea25c10 +<machine_init_done_notifiers>, data=0x0) at ../util/notify.c:39 +#7 0x000055aa1d039ee5 in qdev_machine_creation_done () at +../hw/core/machine.c:1749 +#8 0x000055aa1d2c7b3e in qemu_machine_creation_done (errp=0x55aa1ea5cc40 +<error_fatal>) at ../system/vl.c:2779 +#9 0x000055aa1d2c7c7d in qmp_x_exit_preconfig (errp=0x55aa1ea5cc40 +<error_fatal>) at ../system/vl.c:2807 +#10 0x000055aa1d2ca64f in qemu_init (argc=35, argv=0x7fffb82730e8) at +../system/vl.c:3838 +#11 0x000055aa1d79638c in main (argc=35, argv=0x7fffb82730e8) at +../system/main.c:72 +I'm not sure whether ACPI tables ROM in particular is rewritten with the +same content, but there might be cases where ROM can be read from file +system upon initialization. That is undesirable as guest kernel +certainly won't be too happy about sudden change of the device's ROM +content. + +So the issue we're dealing with here is any unwanted memory related +device initialization upon cpr. + +For now the only thing that comes to my mind is to make a test where we +put as many devices as we can into a VM, make ram blocks RO upon cpr +(and remap them as RW later after migration is done, if needed), and +catch any unwanted memory violations. As Den suggested, we might +consider adding that behaviour as a separate non-default option (or +"migrate" command flag specific to cpr-transfer), which would only be +used in the testing. +I'll look into adding an option, but there may be too many false positives, +such as the qxl_set_mode case above. And the maintainers may object to me +eliminating the false positives by adding more CPR_IN tests, due to gratuitous +(from their POV) ugliness. + +But I will use the technique to look for more write violations. +Andrey +No way. ACPI with the source must be used in the same way as BIOSes +and optional ROMs. +Yup, its a bug. Will fix. + +- Steve + +see +1741380954-341079-1-git-send-email-steven.sistare@oracle.com +/">https://lore.kernel.org/qemu-devel/ +1741380954-341079-1-git-send-email-steven.sistare@oracle.com +/ +- Steve + +On 3/6/2025 11:13 AM, Steven Sistare wrote: +On 3/6/2025 10:52 AM, Denis V. Lunev wrote: +On 3/6/25 16:16, Andrey Drobyshev wrote: +On 3/5/25 11:19 PM, Steven Sistare wrote: +On 3/5/2025 11:50 AM, Andrey Drobyshev wrote: +On 3/4/25 9:05 PM, Steven Sistare wrote: +On 2/28/2025 1:37 PM, Andrey Drobyshev wrote: +On 2/28/25 8:35 PM, Andrey Drobyshev wrote: +On 2/28/25 8:20 PM, Steven Sistare wrote: +On 2/28/2025 1:13 PM, Steven Sistare wrote: +On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: +Hi all, + +We've been experimenting with cpr-transfer migration mode recently +and +have discovered the following issue with the guest QXL driver: + +Run migration source: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-src.sock + +$EMULATOR -enable-kvm \ +       -machine q35 \ +       -cpu host -smp 2 -m 2G \ +       -object memory-backend-file,id=ram0,size=2G,mem-path=/ +dev/shm/ +ram0,share=on\ +       -machine memory-backend=ram0 \ +       -machine aux-ram-share=on \ +       -drive file=$ROOTFS,media=disk,if=virtio \ +       -qmp unix:$QMPSOCK,server=on,wait=off \ +       -nographic \ +       -device qxl-vga +Run migration target: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-dst.sock +$EMULATOR -enable-kvm \ +       -machine q35 \ +       -cpu host -smp 2 -m 2G \ +       -object memory-backend-file,id=ram0,size=2G,mem-path=/ +dev/shm/ +ram0,share=on\ +       -machine memory-backend=ram0 \ +       -machine aux-ram-share=on \ +       -drive file=$ROOTFS,media=disk,if=virtio \ +       -qmp unix:$QMPSOCK,server=on,wait=off \ +       -nographic \ +       -device qxl-vga \ +       -incoming tcp:0:44444 \ +       -incoming '{"channel-type": "cpr", "addr": { "transport": +"socket", "type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' +Launch the migration: +QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell +QMPSOCK=/var/run/alma8qmp-src.sock + +$QMPSHELL -p $QMPSOCK <<EOF +       migrate-set-parameters mode=cpr-transfer +       migrate channels=[{"channel-type":"main","addr": +{"transport":"socket","type":"inet","host":"0","port":"44444"}}, +{"channel-type":"cpr","addr": +{"transport":"socket","type":"unix","path":"/var/run/alma8cpr- +dst.sock"}}] +EOF +Then, after a while, QXL guest driver on target crashes spewing the +following messages: +[  73.962002] [TTM] Buffer eviction failed +[  73.962072] qxl 0000:00:02.0: object_init failed for (3149824, +0x00000001) +[  73.962081] [drm:qxl_alloc_bo_reserved [qxl]] *ERROR* failed to +allocate VRAM BO +That seems to be a known kernel QXL driver bug: +https://lore.kernel.org/all/20220907094423.93581-1- +min_halo@163.com/T/ +https://lore.kernel.org/lkml/ZTgydqRlK6WX_b29@eldamar.lan/ +(the latter discussion contains that reproduce script which +speeds up +the crash in the guest): +#!/bin/bash + +chvt 3 + +for j in $(seq 80); do +           echo "$(date) starting round $j" +           if [ "$(journalctl --boot | grep "failed to allocate +VRAM +BO")" != "" ]; then +                   echo "bug was reproduced after $j tries" +                   exit 1 +           fi +           for i in $(seq 100); do +                   dmesg > /dev/tty3 +           done +done + +echo "bug could not be reproduced" +exit 0 +The bug itself seems to remain unfixed, as I was able to reproduce +that +with Fedora 41 guest, as well as AlmaLinux 8 guest. However our +cpr-transfer code also seems to be buggy as it triggers the crash - +without the cpr-transfer migration the above reproduce doesn't +lead to +crash on the source VM. + +I suspect that, as cpr-transfer doesn't migrate the guest +memory, but +rather passes it through the memory backend object, our code might +somehow corrupt the VRAM. However, I wasn't able to trace the +corruption so far. + +Could somebody help the investigation and take a look into +this? Any +suggestions would be appreciated. Thanks! +Possibly some memory region created by qxl is not being preserved. +Try adding these traces to see what is preserved: + +-trace enable='*cpr*' +-trace enable='*ram_alloc*' +Also try adding this patch to see if it flags any ram blocks as not +compatible with cpr. A message is printed at migration start time. +    +https://lore.kernel.org/qemu-devel/1740667681-257312-1-git-send- +email- +steven.sistare@oracle.com/ + +- Steve +With the traces enabled + the "migration: ram block cpr blockers" +patch +applied: + +Source: +cpr_find_fd pc.bios, id 0 returns -1 +cpr_save_fd pc.bios, id 0, fd 22 +qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 22 host +0x7fec18e00000 +cpr_find_fd pc.rom, id 0 returns -1 +cpr_save_fd pc.rom, id 0, fd 23 +qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 23 host +0x7fec18c00000 +cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns -1 +cpr_save_fd 0000:00:01.0/e1000e.rom, id 0, fd 24 +qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size +262144 fd 24 host 0x7fec18a00000 +cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns -1 +cpr_save_fd 0000:00:02.0/vga.vram, id 0, fd 25 +qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size +67108864 fd 25 host 0x7feb77e00000 +cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.vrom, id 0, fd 27 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 +fd 27 host 0x7fec18800000 +cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.vram, id 0, fd 28 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size +67108864 fd 28 host 0x7feb73c00000 +cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.rom, id 0, fd 34 +qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 +fd 34 host 0x7fec18600000 +cpr_find_fd /rom@etc/acpi/tables, id 0 returns -1 +cpr_save_fd /rom@etc/acpi/tables, id 0, fd 35 +qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size +2097152 fd 35 host 0x7fec18200000 +cpr_find_fd /rom@etc/table-loader, id 0 returns -1 +cpr_save_fd /rom@etc/table-loader, id 0, fd 36 +qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 +fd 36 host 0x7feb8b600000 +cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns -1 +cpr_save_fd /rom@etc/acpi/rsdp, id 0, fd 37 +qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd +37 host 0x7feb8b400000 + +cpr_state_save cpr-transfer mode +cpr_transfer_output /var/run/alma8cpr-dst.sock +Target: +cpr_transfer_input /var/run/alma8cpr-dst.sock +cpr_state_load cpr-transfer mode +cpr_find_fd pc.bios, id 0 returns 20 +qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 20 host +0x7fcdc9800000 +cpr_find_fd pc.rom, id 0 returns 19 +qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 19 host +0x7fcdc9600000 +cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns 18 +qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size +262144 fd 18 host 0x7fcdc9400000 +cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns 17 +qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size +67108864 fd 17 host 0x7fcd27e00000 +cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns 16 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 +fd 16 host 0x7fcdc9200000 +cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns 15 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size +67108864 fd 15 host 0x7fcd23c00000 +cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns 14 +qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 +fd 14 host 0x7fcdc8800000 +cpr_find_fd /rom@etc/acpi/tables, id 0 returns 13 +qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size +2097152 fd 13 host 0x7fcdc8400000 +cpr_find_fd /rom@etc/table-loader, id 0 returns 11 +qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 +fd 11 host 0x7fcdc8200000 +cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns 10 +qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd +10 host 0x7fcd3be00000 +Looks like both vga.vram and qxl.vram are being preserved (with the +same +addresses), and no incompatible ram blocks are found during migration. +Sorry, addressed are not the same, of course. However corresponding +ram +blocks do seem to be preserved and initialized. +So far, I have not reproduced the guest driver failure. + +However, I have isolated places where new QEMU improperly writes to +the qxl memory regions prior to starting the guest, by mmap'ing them +readonly after cpr: + +   qemu_ram_alloc_internal() +     if (reused && (strstr(name, "qxl") || strstr("name", "vga"))) +         ram_flags |= RAM_READONLY; +     new_block = qemu_ram_alloc_from_fd(...) + +I have attached a draft fix; try it and let me know. +My console window looks fine before and after cpr, using +-vnc $hostip:0 -vga qxl + +- Steve +Regarding the reproduce: when I launch the buggy version with the same +options as you, i.e. "-vnc 0.0.0.0:$port -vga qxl", and do cpr-transfer, +my VNC client silently hangs on the target after a while. Could it +happen on your stand as well? +cpr does not preserve the vnc connection and session. To test, I specify +port 0 for the source VM and port 1 for the dest. When the src vnc goes +dormant the dest vnc becomes active. +Sure, I meant that VNC on the dest (on the port 1) works for a while +after the migration and then hangs, apparently after the guest QXL crash. +Could you try launching VM with +"-nographic -device qxl-vga"? That way VM's serial console is given you +directly in the shell, so when qxl driver crashes you're still able to +inspect the kernel messages. +I have been running like that, but have not reproduced the qxl driver +crash, +and I suspect my guest image+kernel is too old. +Yes, that's probably the case. But the crash occurs on my Fedora 41 +guest with the 6.11.5-300.fc41.x86_64 kernel, so newer kernels seem to +be buggy. +However, once I realized the +issue was post-cpr modification of qxl memory, I switched my attention +to the +fix. +As for your patch, I can report that it doesn't resolve the issue as it +is. But I was able to track down another possible memory corruption +using your approach with readonly mmap'ing: +Program terminated with signal SIGSEGV, Segmentation fault. +#0 init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 +412        d->ram->magic      = cpu_to_le32(QXL_RAM_MAGIC); +[Current thread is 1 (Thread 0x7f1a4f83b480 (LWP 229798))] +(gdb) bt +#0 init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 +#1 0x0000563896e7f467 in qxl_realize_common (qxl=0x5638996e0e70, +errp=0x7ffd3c2b8170) at ../hw/display/qxl.c:2142 +#2 0x0000563896e7fda1 in qxl_realize_primary (dev=0x5638996e0e70, +errp=0x7ffd3c2b81d0) at ../hw/display/qxl.c:2257 +#3 0x0000563896c7e8f2 in pci_qdev_realize (qdev=0x5638996e0e70, +errp=0x7ffd3c2b8250) at ../hw/pci/pci.c:2174 +#4 0x00005638970eb54b in device_set_realized (obj=0x5638996e0e70, +value=true, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:494 +#5 0x00005638970f5e14 in property_set_bool (obj=0x5638996e0e70, +v=0x5638996f3770, name=0x56389759b141 "realized", +opaque=0x5638987893d0, errp=0x7ffd3c2b84e0) +     at ../qom/object.c:2374 +#6 0x00005638970f39f8 in object_property_set (obj=0x5638996e0e70, +name=0x56389759b141 "realized", v=0x5638996f3770, errp=0x7ffd3c2b84e0) +     at ../qom/object.c:1449 +#7 0x00005638970f8586 in object_property_set_qobject +(obj=0x5638996e0e70, name=0x56389759b141 "realized", +value=0x5638996df900, errp=0x7ffd3c2b84e0) +     at ../qom/qom-qobject.c:28 +#8 0x00005638970f3d8d in object_property_set_bool +(obj=0x5638996e0e70, name=0x56389759b141 "realized", value=true, +errp=0x7ffd3c2b84e0) +     at ../qom/object.c:1519 +#9 0x00005638970eacb0 in qdev_realize (dev=0x5638996e0e70, +bus=0x563898cf3c20, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:276 +#10 0x0000563896dba675 in qdev_device_add_from_qdict +(opts=0x5638996dfe50, from_json=false, errp=0x7ffd3c2b84e0) at ../ +system/qdev-monitor.c:714 +#11 0x0000563896dba721 in qdev_device_add (opts=0x563898786150, +errp=0x56389855dc40 <error_fatal>) at ../system/qdev-monitor.c:733 +#12 0x0000563896dc48f1 in device_init_func (opaque=0x0, +opts=0x563898786150, errp=0x56389855dc40 <error_fatal>) at ../system/ +vl.c:1207 +#13 0x000056389737a6cc in qemu_opts_foreach +     (list=0x563898427b60 <qemu_device_opts>, func=0x563896dc48ca +<device_init_func>, opaque=0x0, errp=0x56389855dc40 <error_fatal>) +     at ../util/qemu-option.c:1135 +#14 0x0000563896dc89b5 in qemu_create_cli_devices () at ../system/ +vl.c:2745 +#15 0x0000563896dc8c00 in qmp_x_exit_preconfig (errp=0x56389855dc40 +<error_fatal>) at ../system/vl.c:2806 +#16 0x0000563896dcb5de in qemu_init (argc=33, argv=0x7ffd3c2b8948) +at ../system/vl.c:3838 +#17 0x0000563897297323 in main (argc=33, argv=0x7ffd3c2b8948) at ../ +system/main.c:72 +So the attached adjusted version of your patch does seem to help. At +least I can't reproduce the crash on my stand. +Thanks for the stack trace; the calls to SPICE_RING_INIT in init_qxl_ram +are +definitely harmful. Try V2 of the patch, attached, which skips the lines +of init_qxl_ram that modify guest memory. +Thanks, your v2 patch does seem to prevent the crash. Would you re-send +it to the list as a proper fix? +Yes. Was waiting for your confirmation. +I'm wondering, could it be useful to explicitly mark all the reused +memory regions readonly upon cpr-transfer, and then make them writable +back again after the migration is done? That way we will be segfaulting +early on instead of debugging tricky memory corruptions. +It's a useful debugging technique, but changing protection on a large +memory region +can be too expensive for production due to TLB shootdowns. + +Also, there are cases where writes are performed but the value is +guaranteed to +be the same: +  qxl_post_load() +    qxl_set_mode() +      d->rom->mode = cpu_to_le32(modenr); +The value is the same because mode and shadow_rom.mode were passed in +vmstate +from old qemu. +There're also cases where devices' ROM might be re-initialized. E.g. +this segfault occures upon further exploration of RO mapped RAM blocks: +Program terminated with signal SIGSEGV, Segmentation fault. +#0 __memmove_avx_unaligned_erms () at +../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:664 +664            rep    movsb +[Current thread is 1 (Thread 0x7f6e7d08b480 (LWP 310379))] +(gdb) bt +#0 __memmove_avx_unaligned_erms () at +../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:664 +#1 0x000055aa1d030ecd in rom_set_mr (rom=0x55aa200ba380, owner=0x55aa2019ac10, +name=0x7fffb8272bc0 "/rom@etc/acpi/tables", ro=true) +    at ../hw/core/loader.c:1032 +#2 0x000055aa1d031577 in rom_add_blob +    (name=0x55aa1da51f13 "etc/acpi/tables", blob=0x55aa208a1070, len=131072, max_len=2097152, +addr=18446744073709551615, fw_file_name=0x55aa1da51f13 "etc/acpi/tables", +fw_callback=0x55aa1d441f59 <acpi_build_update>, callback_opaque=0x55aa20ff0010, as=0x0, +read_only=true) at ../hw/core/loader.c:1147 +#3 0x000055aa1cfd788d in acpi_add_rom_blob +    (update=0x55aa1d441f59 <acpi_build_update>, opaque=0x55aa20ff0010, +blob=0x55aa1fc9aa00, name=0x55aa1da51f13 "etc/acpi/tables") at ../hw/acpi/utils.c:46 +#4 0x000055aa1d44213f in acpi_setup () at ../hw/i386/acpi-build.c:2720 +#5 0x000055aa1d434199 in pc_machine_done (notifier=0x55aa1ff15050, data=0x0) +at ../hw/i386/pc.c:638 +#6 0x000055aa1d876845 in notifier_list_notify (list=0x55aa1ea25c10 +<machine_init_done_notifiers>, data=0x0) at ../util/notify.c:39 +#7 0x000055aa1d039ee5 in qdev_machine_creation_done () at +../hw/core/machine.c:1749 +#8 0x000055aa1d2c7b3e in qemu_machine_creation_done (errp=0x55aa1ea5cc40 +<error_fatal>) at ../system/vl.c:2779 +#9 0x000055aa1d2c7c7d in qmp_x_exit_preconfig (errp=0x55aa1ea5cc40 +<error_fatal>) at ../system/vl.c:2807 +#10 0x000055aa1d2ca64f in qemu_init (argc=35, argv=0x7fffb82730e8) at +../system/vl.c:3838 +#11 0x000055aa1d79638c in main (argc=35, argv=0x7fffb82730e8) at +../system/main.c:72 +I'm not sure whether ACPI tables ROM in particular is rewritten with the +same content, but there might be cases where ROM can be read from file +system upon initialization. That is undesirable as guest kernel +certainly won't be too happy about sudden change of the device's ROM +content. + +So the issue we're dealing with here is any unwanted memory related +device initialization upon cpr. + +For now the only thing that comes to my mind is to make a test where we +put as many devices as we can into a VM, make ram blocks RO upon cpr +(and remap them as RW later after migration is done, if needed), and +catch any unwanted memory violations. As Den suggested, we might +consider adding that behaviour as a separate non-default option (or +"migrate" command flag specific to cpr-transfer), which would only be +used in the testing. +I'll look into adding an option, but there may be too many false positives, +such as the qxl_set_mode case above. And the maintainers may object to me +eliminating the false positives by adding more CPR_IN tests, due to gratuitous +(from their POV) ugliness. + +But I will use the technique to look for more write violations. +Andrey +No way. ACPI with the source must be used in the same way as BIOSes +and optional ROMs. +Yup, its a bug. Will fix. + +- Steve + diff --git a/results/classifier/012/kernel virtual machine/60339453 b/results/classifier/012/kernel virtual machine/60339453 new file mode 100644 index 00000000..4eb978a6 --- /dev/null +++ b/results/classifier/012/kernel virtual machine/60339453 @@ -0,0 +1,79 @@ +kernel virtual machine: 0.813 +register: 0.787 +boot: 0.782 +arm: 0.780 +other: 0.776 +performance: 0.764 +permissions: 0.750 +TCG: 0.748 +risc-v: 0.707 +device: 0.706 +mistranslation: 0.699 +PID: 0.685 +network: 0.682 +vnc: 0.680 +debug: 0.672 +graphic: 0.671 +semantic: 0.662 +architecture: 0.649 +x86: 0.647 +files: 0.623 +socket: 0.607 +assembly: 0.486 + +[BUG] scsi: vmw_pvscsi: Boot hangs during scsi under qemu, post commit e662502b3a78 + +Hi, + +Commit e662502b3a78 ("scsi: vmw_pvscsi: Set correct residual data length"), +and its backports to stable trees, makes kernel hang during boot, when +ran as a VM under qemu with following parameters: + + -drive file=$DISKFILE,if=none,id=sda + -device pvscsi + -device scsi-hd,bus=scsi.0,drive=sda + +Diving deeper, commit e662502b3a78 + + @@ -585,7 +585,13 @@ static void pvscsi_complete_request(struct +pvscsi_adapter *adapter, + case BTSTAT_SUCCESS: + + /* + + * Commands like INQUIRY may transfer less data than + + * requested by the initiator via bufflen. Set residual + + * count to make upper layer aware of the actual amount + + * of data returned. + + */ + + scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen); + +assumes 'e->dataLen' is properly armed with actual num of bytes +transferred; alas qemu's hw/scsi/vmw_pvscsi.c never arms the 'dataLen' +field of the completion descriptor (kept zero). + +As a result, the residual count is set as the *entire* 'scsi_bufflen' of a +good transfer, which makes upper scsi layers repeatedly ignore this +valid transfer. + +Not properly arming 'dataLen' seems as an oversight in qemu, which needs +to be fixed. + +However, since kernels with commit e662502b3a78 (and backports) now fail +to boot under qemu's "-device pvscsi", a suggested workaround is to set +the residual count *only* if 'e->dataLen' is armed, e.g: + + @@ -588,7 +588,8 @@ static void pvscsi_complete_request(struct pvscsi_adapter +*adapter, + * count to make upper layer aware of the actual +amount + * of data returned. + */ + - scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen); + + if (e->dataLen) + + scsi_set_resid(cmd, scsi_bufflen(cmd) - +e->dataLen); + +in order to make kernels boot on old qemu binaries. + +Best, +Shmulik + diff --git a/results/classifier/012/kernel virtual machine/71456293 b/results/classifier/012/kernel virtual machine/71456293 new file mode 100644 index 00000000..bc756aac --- /dev/null +++ b/results/classifier/012/kernel virtual machine/71456293 @@ -0,0 +1,1504 @@ +kernel virtual machine: 0.715 +mistranslation: 0.659 +TCG: 0.642 +x86: 0.637 +vnc: 0.625 +risc-v: 0.621 +debug: 0.620 +PID: 0.614 +permissions: 0.613 +register: 0.609 +graphic: 0.603 +assembly: 0.602 +device: 0.601 +semantic: 0.600 +other: 0.598 +arm: 0.598 +boot: 0.598 +socket: 0.596 +architecture: 0.594 +performance: 0.594 +files: 0.592 +network: 0.491 + +[Qemu-devel][bug] qemu crash when migrate vm and vm's disks + +When migrate vm and vmâs disks target host qemu crash due to an invalid free. +#0 object_unref (obj=0x1000) at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/qom/object.c:920 +#1 0x0000560434d79e79 in memory_region_unref (mr=<optimized out>) +at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:1730 +#2 flatview_destroy (view=0x560439653880) at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:292 +#3 0x000056043514dfbe in call_rcu_thread (opaque=<optimized out>) +at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/util/rcu.c:284 +#4 0x00007fbc2b36fe25 in start_thread () from /lib64/libpthread.so.0 +#5 0x00007fbc2b099bad in clone () from /lib64/libc.so.6 +test base qemu-2.12.0 +ï¼ +but use lastest qemu(v6.0.0-rc2) also reproduce. +As follow patch can resolve this problem: +https://lists.gnu.org/archive/html/qemu-devel/2018-07/msg02272.html +Steps to reproduce: +(1) Create VM (virsh define) +(2) Add 64 virtio scsi disks +(3) migrate vm and vmâdisks +------------------------------------------------------------------------------------------------------------------------------------- +æ¬é®ä»¶åå ¶é件嫿æ°åä¸éå¢çä¿å¯ä¿¡æ¯ï¼ä» éäºåéç»ä¸é¢å°åä¸ååº +ç个人æç¾¤ç»ãç¦æ¢ä»»ä½å ¶ä»äººä»¥ä»»ä½å½¢å¼ä½¿ç¨ï¼å æ¬ä½ä¸éäºå ¨é¨æé¨åå°æ³é²ãå¤å¶ã +ææ£åï¼æ¬é®ä»¶ä¸çä¿¡æ¯ã妿æ¨éæ¶äºæ¬é®ä»¶ï¼è¯·æ¨ç«å³çµè¯æé®ä»¶éç¥å件人并å 餿¬ +é®ä»¶ï¼ +This e-mail and its attachments contain confidential information from New H3C, which is +intended only for the person or entity whose address is listed above. Any use of the +information contained herein in any way (including, but not limited to, total or partial +disclosure, reproduction, or dissemination) by persons other than the intended +recipient(s) is prohibited. If you receive this e-mail in error, please notify the sender +by phone or email immediately and delete it! + +* Yuchen (yu.chen@h3c.com) wrote: +> +When migrate vm and vmâs disks target host qemu crash due to an invalid free. +> +> +#0 object_unref (obj=0x1000) at +> +/qemu-2.12/rpmbuild/BUILD/qemu-2.12/qom/object.c:920 +> +#1 0x0000560434d79e79 in memory_region_unref (mr=<optimized out>) +> +at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:1730 +> +#2 flatview_destroy (view=0x560439653880) at +> +/qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:292 +> +#3 0x000056043514dfbe in call_rcu_thread (opaque=<optimized out>) +> +at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/util/rcu.c:284 +> +#4 0x00007fbc2b36fe25 in start_thread () from /lib64/libpthread.so.0 +> +#5 0x00007fbc2b099bad in clone () from /lib64/libc.so.6 +> +> +test base qemu-2.12.0ï¼but use lastest qemu(v6.0.0-rc2) also reproduce. +Interesting. + +> +As follow patch can resolve this problem: +> +https://lists.gnu.org/archive/html/qemu-devel/2018-07/msg02272.html +That's a pci/rcu change; ccing Paolo and Micahel. + +> +Steps to reproduce: +> +(1) Create VM (virsh define) +> +(2) Add 64 virtio scsi disks +Is that hot adding the disks later, or are they included in the VM at +creation? +Can you provide a libvirt XML example? + +> +(3) migrate vm and vmâdisks +What do you mean by 'and vm disks' - are you doing a block migration? + +Dave + +> +------------------------------------------------------------------------------------------------------------------------------------- +> +æ¬é®ä»¶åå ¶é件嫿æ°åä¸éå¢çä¿å¯ä¿¡æ¯ï¼ä» éäºåéç»ä¸é¢å°åä¸ååº +> +ç个人æç¾¤ç»ãç¦æ¢ä»»ä½å ¶ä»äººä»¥ä»»ä½å½¢å¼ä½¿ç¨ï¼å æ¬ä½ä¸éäºå ¨é¨æé¨åå°æ³é²ãå¤å¶ã +> +ææ£åï¼æ¬é®ä»¶ä¸çä¿¡æ¯ã妿æ¨éæ¶äºæ¬é®ä»¶ï¼è¯·æ¨ç«å³çµè¯æé®ä»¶éç¥å件人并å 餿¬ +> +é®ä»¶ï¼ +> +This e-mail and its attachments contain confidential information from New +> +H3C, which is +> +intended only for the person or entity whose address is listed above. Any use +> +of the +> +information contained herein in any way (including, but not limited to, total +> +or partial +> +disclosure, reproduction, or dissemination) by persons other than the intended +> +recipient(s) is prohibited. If you receive this e-mail in error, please +> +notify the sender +> +by phone or email immediately and delete it! +-- +Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK + +> +-----é®ä»¶åä»¶----- +> +å件人: Dr. David Alan Gilbert [ +mailto:dgilbert@redhat.com +] +> +åéæ¶é´: 2021å¹´4æ8æ¥ 19:27 +> +æ¶ä»¶äºº: yuchen (Cloud) <yu.chen@h3c.com>; pbonzini@redhat.com; +> +mst@redhat.com +> +æé: qemu-devel@nongnu.org +> +主é¢: Re: [Qemu-devel][bug] qemu crash when migrate vm and vm's disks +> +> +* Yuchen (yu.chen@h3c.com) wrote: +> +> When migrate vm and vmâs disks target host qemu crash due to an invalid +> +free. +> +> +> +> #0 object_unref (obj=0x1000) at +> +> /qemu-2.12/rpmbuild/BUILD/qemu-2.12/qom/object.c:920 +> +> #1 0x0000560434d79e79 in memory_region_unref (mr=<optimized out>) +> +> at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:1730 +> +> #2 flatview_destroy (view=0x560439653880) at +> +> /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:292 +> +> #3 0x000056043514dfbe in call_rcu_thread (opaque=<optimized out>) +> +> at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/util/rcu.c:284 +> +> #4 0x00007fbc2b36fe25 in start_thread () from /lib64/libpthread.so.0 +> +> #5 0x00007fbc2b099bad in clone () from /lib64/libc.so.6 +> +> +> +> test base qemu-2.12.0ï¼but use lastest qemu(v6.0.0-rc2) also reproduce. +> +> +Interesting. +> +> +> As follow patch can resolve this problem: +> +> +https://lists.gnu.org/archive/html/qemu-devel/2018-07/msg02272.html +> +> +That's a pci/rcu change; ccing Paolo and Micahel. +> +> +> Steps to reproduce: +> +> (1) Create VM (virsh define) +> +> (2) Add 64 virtio scsi disks +> +> +Is that hot adding the disks later, or are they included in the VM at +> +creation? +> +Can you provide a libvirt XML example? +> +Include disks in the VM at creation + +vm disks xml (only virtio scsi disks): + <devices> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native'/> + <source file='/vms/tempp/vm-os'/> + <target dev='vda' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x08' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data1'/> + <target dev='sda' bus='scsi'/> + <address type='drive' controller='2' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data2'/> + <target dev='sdb' bus='scsi'/> + <address type='drive' controller='3' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data3'/> + <target dev='sdc' bus='scsi'/> + <address type='drive' controller='4' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data4'/> + <target dev='sdd' bus='scsi'/> + <address type='drive' controller='5' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data5'/> + <target dev='sde' bus='scsi'/> + <address type='drive' controller='6' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data6'/> + <target dev='sdf' bus='scsi'/> + <address type='drive' controller='7' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data7'/> + <target dev='sdg' bus='scsi'/> + <address type='drive' controller='8' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data8'/> + <target dev='sdh' bus='scsi'/> + <address type='drive' controller='9' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data9'/> + <target dev='sdi' bus='scsi'/> + <address type='drive' controller='10' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data10'/> + <target dev='sdj' bus='scsi'/> + <address type='drive' controller='11' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data11'/> + <target dev='sdk' bus='scsi'/> + <address type='drive' controller='12' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data12'/> + <target dev='sdl' bus='scsi'/> + <address type='drive' controller='13' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data13'/> + <target dev='sdm' bus='scsi'/> + <address type='drive' controller='14' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data14'/> + <target dev='sdn' bus='scsi'/> + <address type='drive' controller='15' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data15'/> + <target dev='sdo' bus='scsi'/> + <address type='drive' controller='16' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data16'/> + <target dev='sdp' bus='scsi'/> + <address type='drive' controller='17' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data17'/> + <target dev='sdq' bus='scsi'/> + <address type='drive' controller='18' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data18'/> + <target dev='sdr' bus='scsi'/> + <address type='drive' controller='19' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data19'/> + <target dev='sds' bus='scsi'/> + <address type='drive' controller='20' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data20'/> + <target dev='sdt' bus='scsi'/> + <address type='drive' controller='21' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data21'/> + <target dev='sdu' bus='scsi'/> + <address type='drive' controller='22' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data22'/> + <target dev='sdv' bus='scsi'/> + <address type='drive' controller='23' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data23'/> + <target dev='sdw' bus='scsi'/> + <address type='drive' controller='24' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data24'/> + <target dev='sdx' bus='scsi'/> + <address type='drive' controller='25' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data25'/> + <target dev='sdy' bus='scsi'/> + <address type='drive' controller='26' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data26'/> + <target dev='sdz' bus='scsi'/> + <address type='drive' controller='27' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data27'/> + <target dev='sdaa' bus='scsi'/> + <address type='drive' controller='28' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data28'/> + <target dev='sdab' bus='scsi'/> + <address type='drive' controller='29' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data29'/> + <target dev='sdac' bus='scsi'/> + <address type='drive' controller='30' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data30'/> + <target dev='sdad' bus='scsi'/> + <address type='drive' controller='31' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data31'/> + <target dev='sdae' bus='scsi'/> + <address type='drive' controller='32' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data32'/> + <target dev='sdaf' bus='scsi'/> + <address type='drive' controller='33' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data33'/> + <target dev='sdag' bus='scsi'/> + <address type='drive' controller='34' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data34'/> + <target dev='sdah' bus='scsi'/> + <address type='drive' controller='35' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data35'/> + <target dev='sdai' bus='scsi'/> + <address type='drive' controller='36' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data36'/> + <target dev='sdaj' bus='scsi'/> + <address type='drive' controller='37' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data37'/> + <target dev='sdak' bus='scsi'/> + <address type='drive' controller='38' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data38'/> + <target dev='sdal' bus='scsi'/> + <address type='drive' controller='39' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data39'/> + <target dev='sdam' bus='scsi'/> + <address type='drive' controller='40' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data40'/> + <target dev='sdan' bus='scsi'/> + <address type='drive' controller='41' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data41'/> + <target dev='sdao' bus='scsi'/> + <address type='drive' controller='42' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data42'/> + <target dev='sdap' bus='scsi'/> + <address type='drive' controller='43' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data43'/> + <target dev='sdaq' bus='scsi'/> + <address type='drive' controller='44' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data44'/> + <target dev='sdar' bus='scsi'/> + <address type='drive' controller='45' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data45'/> + <target dev='sdas' bus='scsi'/> + <address type='drive' controller='46' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data46'/> + <target dev='sdat' bus='scsi'/> + <address type='drive' controller='47' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data47'/> + <target dev='sdau' bus='scsi'/> + <address type='drive' controller='48' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data48'/> + <target dev='sdav' bus='scsi'/> + <address type='drive' controller='49' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data49'/> + <target dev='sdaw' bus='scsi'/> + <address type='drive' controller='50' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data50'/> + <target dev='sdax' bus='scsi'/> + <address type='drive' controller='51' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data51'/> + <target dev='sday' bus='scsi'/> + <address type='drive' controller='52' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data52'/> + <target dev='sdaz' bus='scsi'/> + <address type='drive' controller='53' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data53'/> + <target dev='sdba' bus='scsi'/> + <address type='drive' controller='54' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data54'/> + <target dev='sdbb' bus='scsi'/> + <address type='drive' controller='55' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data55'/> + <target dev='sdbc' bus='scsi'/> + <address type='drive' controller='56' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data56'/> + <target dev='sdbd' bus='scsi'/> + <address type='drive' controller='57' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data57'/> + <target dev='sdbe' bus='scsi'/> + <address type='drive' controller='58' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data58'/> + <target dev='sdbf' bus='scsi'/> + <address type='drive' controller='59' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data59'/> + <target dev='sdbg' bus='scsi'/> + <address type='drive' controller='60' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data60'/> + <target dev='sdbh' bus='scsi'/> + <address type='drive' controller='61' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data61'/> + <target dev='sdbi' bus='scsi'/> + <address type='drive' controller='62' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data62'/> + <target dev='sdbj' bus='scsi'/> + <address type='drive' controller='63' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data63'/> + <target dev='sdbk' bus='scsi'/> + <address type='drive' controller='64' bus='0' target='0' unit='0'/> + </disk> + <controller type='scsi' index='0'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x02' +function='0x0'/> + </controller> + <controller type='scsi' index='1' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x06' +function='0x0'/> + </controller> + <controller type='scsi' index='2' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x01' +function='0x0'/> + </controller> + <controller type='scsi' index='3' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x03' +function='0x0'/> + </controller> + <controller type='scsi' index='4' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x04' +function='0x0'/> + </controller> + <controller type='scsi' index='5' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x05' +function='0x0'/> + </controller> + <controller type='scsi' index='6' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x06' +function='0x0'/> + </controller> + <controller type='scsi' index='7' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x07' +function='0x0'/> + </controller> + <controller type='scsi' index='8' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x08' +function='0x0'/> + </controller> + <controller type='scsi' index='9' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x09' +function='0x0'/> + </controller> + <controller type='scsi' index='10' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0a' +function='0x0'/> + </controller> + <controller type='scsi' index='11' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0b' +function='0x0'/> + </controller> + <controller type='scsi' index='12' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0c' +function='0x0'/> + </controller> + <controller type='scsi' index='13' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0d' +function='0x0'/> + </controller> + <controller type='scsi' index='14' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0e' +function='0x0'/> + </controller> + <controller type='scsi' index='15' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0f' +function='0x0'/> + </controller> + <controller type='scsi' index='16' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x10' +function='0x0'/> + </controller> + <controller type='scsi' index='17' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x11' +function='0x0'/> + </controller> + <controller type='scsi' index='18' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x12' +function='0x0'/> + </controller> + <controller type='scsi' index='19' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x13' +function='0x0'/> + </controller> + <controller type='scsi' index='20' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x14' +function='0x0'/> + </controller> + <controller type='scsi' index='21' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x15' +function='0x0'/> + </controller> + <controller type='scsi' index='22' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x16' +function='0x0'/> + </controller> + <controller type='scsi' index='23' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x17' +function='0x0'/> + </controller> + <controller type='scsi' index='24' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x18' +function='0x0'/> + </controller> + <controller type='scsi' index='25' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x19' +function='0x0'/> + </controller> + <controller type='scsi' index='26' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1a' +function='0x0'/> + </controller> + <controller type='scsi' index='27' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1b' +function='0x0'/> + </controller> + <controller type='scsi' index='28' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1c' +function='0x0'/> + </controller> + <controller type='scsi' index='29' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1d' +function='0x0'/> + </controller> + <controller type='scsi' index='30' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1e' +function='0x0'/> + </controller> + <controller type='scsi' index='31' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x01' +function='0x0'/> + </controller> + <controller type='scsi' index='32' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x02' +function='0x0'/> + </controller> + <controller type='scsi' index='33' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x03' +function='0x0'/> + </controller> + <controller type='scsi' index='34' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x04' +function='0x0'/> + </controller> + <controller type='scsi' index='35' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x05' +function='0x0'/> + </controller> + <controller type='scsi' index='36' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x06' +function='0x0'/> + </controller> + <controller type='scsi' index='37' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x07' +function='0x0'/> + </controller> + <controller type='scsi' index='38' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x08' +function='0x0'/> + </controller> + <controller type='scsi' index='39' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x09' +function='0x0'/> + </controller> + <controller type='scsi' index='40' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x0a' +function='0x0'/> + </controller> + <controller type='scsi' index='41' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x0b' +function='0x0'/> + </controller> + <controller type='scsi' index='42' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x0c' +function='0x0'/> + </controller> + <controller type='scsi' index='43' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x0d' +function='0x0'/> + </controller> + <controller type='scsi' index='44' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' +function='0x0'/> + </controller> + <controller type='scsi' index='45' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x09' +function='0x0'/> + </controller> + <controller type='scsi' index='46' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0b' +function='0x0'/> + </controller> + <controller type='scsi' index='47' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0c' +function='0x0'/> + </controller> + <controller type='scsi' index='48' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0d' +function='0x0'/> + </controller> + <controller type='scsi' index='49' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0e' +function='0x0'/> + </controller> + <controller type='scsi' index='50' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0f' +function='0x0'/> + </controller> + <controller type='scsi' index='51' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x10' +function='0x0'/> + </controller> + <controller type='scsi' index='52' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x11' +function='0x0'/> + </controller> + <controller type='scsi' index='53' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x12' +function='0x0'/> + </controller> + <controller type='scsi' index='54' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x13' +function='0x0'/> + </controller> + <controller type='scsi' index='55' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x14' +function='0x0'/> + </controller> + <controller type='scsi' index='56' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x15' +function='0x0'/> + </controller> + <controller type='scsi' index='57' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x16' +function='0x0'/> + </controller> + <controller type='scsi' index='58' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x17' +function='0x0'/> + </controller> + <controller type='scsi' index='59' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x18' +function='0x0'/> + </controller> + <controller type='scsi' index='60' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x19' +function='0x0'/> + </controller> + <controller type='scsi' index='61' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1a' +function='0x0'/> + </controller> + <controller type='scsi' index='62' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1b' +function='0x0'/> + </controller> + <controller type='scsi' index='63' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1c' +function='0x0'/> + </controller> + <controller type='scsi' index='64' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1d' +function='0x0'/> + </controller> + <controller type='pci' index='0' model='pci-root'/> + <controller type='pci' index='1' model='pci-bridge'> + <model name='pci-bridge'/> + <target chassisNr='1'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1f' +function='0x0'/> + </controller> + <controller type='pci' index='2' model='pci-bridge'> + <model name='pci-bridge'/> + <target chassisNr='2'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1f' +function='0x0'/> + </controller> + </devices> + +vm disks xml (only virtio disks): + <devices> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native'/> + <source file='/vms/tempp/vm-os'/> + <target dev='vda' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x08' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data2'/> + <target dev='vdb' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x06' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data3'/> + <target dev='vdc' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x09' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data4'/> + <target dev='vdd' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0b' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data5'/> + <target dev='vde' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0c' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data6'/> + <target dev='vdf' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0d' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data7'/> + <target dev='vdg' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0e' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data8'/> + <target dev='vdh' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0f' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data9'/> + <target dev='vdi' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x10' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data10'/> + <target dev='vdj' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x11' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data11'/> + <target dev='vdk' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x12' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data12'/> + <target dev='vdl' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x13' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data13'/> + <target dev='vdm' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x14' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data14'/> + <target dev='vdn' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x15' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data15'/> + <target dev='vdo' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x16' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data16'/> + <target dev='vdp' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x17' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data17'/> + <target dev='vdq' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x18' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data18'/> + <target dev='vdr' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x19' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data19'/> + <target dev='vds' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1a' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data20'/> + <target dev='vdt' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1b' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data21'/> + <target dev='vdu' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1c' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data22'/> + <target dev='vdv' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1d' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data23'/> + <target dev='vdw' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1e' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data24'/> + <target dev='vdx' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x01' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data25'/> + <target dev='vdy' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x03' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data26'/> + <target dev='vdz' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x04' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data27'/> + <target dev='vdaa' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x05' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data28'/> + <target dev='vdab' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x06' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data29'/> + <target dev='vdac' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x07' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data30'/> + <target dev='vdad' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x08' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data31'/> + <target dev='vdae' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x09' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data32'/> + <target dev='vdaf' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0a' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data33'/> + <target dev='vdag' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0b' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data34'/> + <target dev='vdah' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0c' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data35'/> + <target dev='vdai' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0d' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data36'/> + <target dev='vdaj' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0e' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data37'/> + <target dev='vdak' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0f' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data38'/> + <target dev='vdal' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x10' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data39'/> + <target dev='vdam' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x11' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data40'/> + <target dev='vdan' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x12' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data41'/> + <target dev='vdao' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x13' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data42'/> + <target dev='vdap' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x14' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data43'/> + <target dev='vdaq' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x15' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data44'/> + <target dev='vdar' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x16' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data45'/> + <target dev='vdas' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x17' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data46'/> + <target dev='vdat' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x18' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data47'/> + <target dev='vdau' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x19' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data48'/> + <target dev='vdav' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1a' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data49'/> + <target dev='vdaw' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1b' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data50'/> + <target dev='vdax' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1c' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data51'/> + <target dev='vday' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1d' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data52'/> + <target dev='vdaz' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1e' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data53'/> + <target dev='vdba' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x01' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data54'/> + <target dev='vdbb' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x02' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data55'/> + <target dev='vdbc' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x03' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data56'/> + <target dev='vdbd' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x04' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data57'/> + <target dev='vdbe' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x05' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data58'/> + <target dev='vdbf' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x06' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data59'/> + <target dev='vdbg' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x07' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data60'/> + <target dev='vdbh' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x08' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data61'/> + <target dev='vdbi' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x09' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data62'/> + <target dev='vdbj' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x0a' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data63'/> + <target dev='vdbk' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x0b' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data1'/> + <target dev='vdbl' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' +function='0x0'/> + </disk> + <controller type='pci' index='0' model='pci-root'/> + <controller type='pci' index='1' model='pci-bridge'> + <model name='pci-bridge'/> + <target chassisNr='1'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1f' +function='0x0'/> + </controller> + <controller type='pci' index='2' model='pci-bridge'> + <model name='pci-bridge'/> + <target chassisNr='2'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1f' +function='0x0'/> + </controller> + </devices> + +> +> (3) migrate vm and vmâdisks +> +> +What do you mean by 'and vm disks' - are you doing a block migration? +> +Yes, block migration. +In fact, only migration domain also reproduced. + +> +Dave +> +> +> ---------------------------------------------------------------------- +> +> --------------------------------------------------------------- +> +Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK +------------------------------------------------------------------------------------------------------------------------------------- +æ¬é®ä»¶åå ¶é件嫿æ°åä¸éå¢çä¿å¯ä¿¡æ¯ï¼ä» éäºåéç»ä¸é¢å°åä¸ååº +ç个人æç¾¤ç»ãç¦æ¢ä»»ä½å ¶ä»äººä»¥ä»»ä½å½¢å¼ä½¿ç¨ï¼å æ¬ä½ä¸éäºå ¨é¨æé¨åå°æ³é²ãå¤å¶ã +ææ£åï¼æ¬é®ä»¶ä¸çä¿¡æ¯ã妿æ¨éæ¶äºæ¬é®ä»¶ï¼è¯·æ¨ç«å³çµè¯æé®ä»¶éç¥å件人并å 餿¬ +é®ä»¶ï¼ +This e-mail and its attachments contain confidential information from New H3C, +which is +intended only for the person or entity whose address is listed above. Any use +of the +information contained herein in any way (including, but not limited to, total +or partial +disclosure, reproduction, or dissemination) by persons other than the intended +recipient(s) is prohibited. If you receive this e-mail in error, please notify +the sender +by phone or email immediately and delete it! + diff --git a/results/classifier/012/kernel virtual machine/80615920 b/results/classifier/012/kernel virtual machine/80615920 new file mode 100644 index 00000000..b07bc360 --- /dev/null +++ b/results/classifier/012/kernel virtual machine/80615920 @@ -0,0 +1,366 @@ +kernel virtual machine: 0.812 +risc-v: 0.809 +mistranslation: 0.800 +other: 0.786 +TCG: 0.785 +x86: 0.779 +vnc: 0.768 +performance: 0.758 +permissions: 0.758 +register: 0.756 +architecture: 0.755 +files: 0.751 +boot: 0.750 +device: 0.748 +assembly: 0.747 +debug: 0.746 +arm: 0.744 +semantic: 0.737 +network: 0.732 +socket: 0.732 +graphic: 0.730 +PID: 0.727 + +[BUG] accel/tcg: cpu_exec_longjmp_cleanup: assertion failed: (cpu == current_cpu) + +It seems there is a bug in SIGALRM handling when 486 system emulates x86_64 +code. + +This code: + +#include <stdio.h> +#include <stdlib.h> +#include <pthread.h> +#include <signal.h> +#include <unistd.h> + +pthread_t thread1, thread2; + +// Signal handler for SIGALRM +void alarm_handler(int sig) { + // Do nothing, just wake up the other thread +} + +// Thread 1 function +void* thread1_func(void* arg) { + // Set up the signal handler for SIGALRM + signal(SIGALRM, alarm_handler); + + // Wait for 5 seconds + sleep(1); + + // Send SIGALRM signal to thread 2 + pthread_kill(thread2, SIGALRM); + + return NULL; +} + +// Thread 2 function +void* thread2_func(void* arg) { + // Wait for the SIGALRM signal + pause(); + + printf("Thread 2 woke up!\n"); + + return NULL; +} + +int main() { + // Create thread 1 + if (pthread_create(&thread1, NULL, thread1_func, NULL) != 0) { + fprintf(stderr, "Failed to create thread 1\n"); + return 1; + } + + // Create thread 2 + if (pthread_create(&thread2, NULL, thread2_func, NULL) != 0) { + fprintf(stderr, "Failed to create thread 2\n"); + return 1; + } + + // Wait for both threads to finish + pthread_join(thread1, NULL); + pthread_join(thread2, NULL); + + return 0; +} + + +Fails with this -strace log (there are also unsupported syscalls 334 and 435, +but it seems it doesn't affect the code much): + +... +736 rt_sigaction(SIGALRM,0x000000001123ec20,0x000000001123ecc0) = 0 +736 clock_nanosleep(CLOCK_REALTIME,0,{tv_sec = 1,tv_nsec = 0},{tv_sec = +1,tv_nsec = 0}) +736 rt_sigprocmask(SIG_BLOCK,0x00000000109fad20,0x0000000010800b38,8) = 0 +736 Unknown syscall 435 +736 +clone(CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID| + ... +736 rt_sigprocmask(SIG_SETMASK,0x0000000010800b38,NULL,8) +736 set_robust_list(0x11a419a0,0) = -1 errno=38 (Function not implemented) +736 rt_sigprocmask(SIG_SETMASK,0x0000000011a41fb0,NULL,8) = 0 + = 0 +736 pause(0,0,2,277186368,0,295966400) +736 +futex(0x000000001123f990,FUTEX_CLOCK_REALTIME|FUTEX_WAIT_BITSET,738,NULL,NULL,0) + = 0 +736 rt_sigprocmask(SIG_BLOCK,0x00000000109fad20,0x000000001123ee88,8) = 0 +736 getpid() = 736 +736 tgkill(736,739,SIGALRM) = 0 + = -1 errno=4 (Interrupted system call) +--- SIGALRM {si_signo=SIGALRM, si_code=SI_TKILL, si_pid=736, si_uid=0} --- +0x48874a != 0x3c69e10 +736 rt_sigprocmask(SIG_SETMASK,0x000000001123ee88,NULL,8) = 0 +** +ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion failed: +(cpu == current_cpu) +Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion +failed: (cpu == current_cpu) +0x48874a != 0x3c69e10 +** +ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion failed: +(cpu == current_cpu) +Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion +failed: (cpu == current_cpu) +# + +The code fails either with or without -singlestep, the command line: + +/usr/bin/qemu-x86_64 -L /opt/x86_64 -strace -singlestep /opt/x86_64/alarm.bin + +Source code of QEMU 8.1.1 was modified with patch "[PATCH] qemu/timer: Don't +use RDTSC on i486" [1], +with added few ioctls (not relevant) and cpu_exec_longjmp_cleanup() now prints +current pointers of +cpu and current_cpu (line "0x48874a != 0x3c69e10"). + +config.log (built as a part of buildroot, basically the minimal possible +configuration for running x86_64 on 486): + +# Configured with: +'/mnt/hd_8tb_p1/p1/home/crossgen/buildroot_486_2/output/build/qemu-8.1.1/configure' + '--prefix=/usr' +'--cross-prefix=/mnt/hd_8tb_p1/p1/home/crossgen/buildroot_486_2/output/host/bin/i486-buildroot-linux-gnu-' + '--audio-drv-list=' +'--python=/mnt/hd_8tb_p1/p1/home/crossgen/buildroot_486_2/output/host/bin/python3' + +'--ninja=/mnt/hd_8tb_p1/p1/home/crossgen/buildroot_486_2/output/host/bin/ninja' +'--disable-alsa' '--disable-bpf' '--disable-brlapi' '--disable-bsd-user' +'--disable-cap-ng' '--disable-capstone' '--disable-containers' +'--disable-coreaudio' '--disable-curl' '--disable-curses' +'--disable-dbus-display' '--disable-docs' '--disable-dsound' '--disable-hvf' +'--disable-jack' '--disable-libiscsi' '--disable-linux-aio' +'--disable-linux-io-uring' '--disable-malloc-trim' '--disable-membarrier' +'--disable-mpath' '--disable-netmap' '--disable-opengl' '--disable-oss' +'--disable-pa' '--disable-rbd' '--disable-sanitizers' '--disable-selinux' +'--disable-sparse' '--disable-strip' '--disable-vde' '--disable-vhost-crypto' +'--disable-vhost-user-blk-server' '--disable-virtfs' '--disable-whpx' +'--disable-xen' '--disable-attr' '--disable-kvm' '--disable-vhost-net' +'--disable-download' '--disable-hexagon-idef-parser' '--disable-system' +'--enable-linux-user' '--target-list=x86_64-linux-user' '--disable-vhost-user' +'--disable-slirp' '--disable-sdl' '--disable-fdt' '--enable-trace-backends=nop' +'--disable-tools' '--disable-guest-agent' '--disable-fuse' +'--disable-fuse-lseek' '--disable-seccomp' '--disable-libssh' +'--disable-libusb' '--disable-vnc' '--disable-nettle' '--disable-numa' +'--disable-pipewire' '--disable-spice' '--disable-usb-redir' +'--disable-install-blobs' + +Emulation of the same x86_64 code with qemu 6.2.0 installed on another x86_64 +native machine works fine. + +[1] +https://lists.nongnu.org/archive/html/qemu-devel/2023-11/msg05387.html +Best regards, +Petr + +On Sat, 25 Nov 2023 at 13:09, Petr Cvek <petrcvekcz@gmail.com> wrote: +> +> +It seems there is a bug in SIGALRM handling when 486 system emulates x86_64 +> +code. +486 host is pretty well out of support currently. Can you reproduce +this on a less ancient host CPU type ? + +> +ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion failed: +> +(cpu == current_cpu) +> +Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: +> +assertion failed: (cpu == current_cpu) +> +0x48874a != 0x3c69e10 +> +** +> +ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion failed: +> +(cpu == current_cpu) +> +Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: +> +assertion failed: (cpu == current_cpu) +What compiler version do you build QEMU with? That +assert is there because we have seen some buggy compilers +in the past which don't correctly preserve the variable +value as the setjmp/longjmp spec requires them to. + +thanks +-- PMM + +Dne 27. 11. 23 v 10:37 Peter Maydell napsal(a): +> +On Sat, 25 Nov 2023 at 13:09, Petr Cvek <petrcvekcz@gmail.com> wrote: +> +> +> +> It seems there is a bug in SIGALRM handling when 486 system emulates x86_64 +> +> code. +> +> +486 host is pretty well out of support currently. Can you reproduce +> +this on a less ancient host CPU type ? +> +It seems it only fails when the code is compiled for i486. QEMU built with the +same compiler with -march=i586 and above runs on the same physical hardware +without a problem. All -march= variants were executed on ryzen 3600. + +> +> ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion +> +> failed: (cpu == current_cpu) +> +> Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: +> +> assertion failed: (cpu == current_cpu) +> +> 0x48874a != 0x3c69e10 +> +> ** +> +> ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion +> +> failed: (cpu == current_cpu) +> +> Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: +> +> assertion failed: (cpu == current_cpu) +> +> +What compiler version do you build QEMU with? That +> +assert is there because we have seen some buggy compilers +> +in the past which don't correctly preserve the variable +> +value as the setjmp/longjmp spec requires them to. +> +i486 and i586+ code variants were compiled with GCC 13.2.0 (more exactly, +slackware64 current multilib distribution). + +i486 binary which runs on the real 486 is also GCC 13.2.0 and installed as a +part of the buildroot crosscompiler (about two week old git snapshot). + +> +thanks +> +-- PMM +best regards, +Petr + +On 11/25/23 07:08, Petr Cvek wrote: +ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion failed: +(cpu == current_cpu) +Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion +failed: (cpu == current_cpu) +# + +The code fails either with or without -singlestep, the command line: + +/usr/bin/qemu-x86_64 -L /opt/x86_64 -strace -singlestep /opt/x86_64/alarm.bin + +Source code of QEMU 8.1.1 was modified with patch "[PATCH] qemu/timer: Don't use +RDTSC on i486" [1], +with added few ioctls (not relevant) and cpu_exec_longjmp_cleanup() now prints +current pointers of +cpu and current_cpu (line "0x48874a != 0x3c69e10"). +If you try this again with 8.2-rc2, you should not see an assertion failure. +You should see instead + +QEMU internal SIGILL {code=ILLOPC, addr=0x12345678} +which I think more accurately summarizes the situation of attempting RDTSC on hardware +that does not support it. +r~ + +Dne 29. 11. 23 v 15:25 Richard Henderson napsal(a): +> +On 11/25/23 07:08, Petr Cvek wrote: +> +> ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion +> +> failed: (cpu == current_cpu) +> +> Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: +> +> assertion failed: (cpu == current_cpu) +> +> # +> +> +> +> The code fails either with or without -singlestep, the command line: +> +> +> +> /usr/bin/qemu-x86_64 -L /opt/x86_64 -strace -singlestep +> +> /opt/x86_64/alarm.bin +> +> +> +> Source code of QEMU 8.1.1 was modified with patch "[PATCH] qemu/timer: Don't +> +> use RDTSC on i486" [1], +> +> with added few ioctls (not relevant) and cpu_exec_longjmp_cleanup() now +> +> prints current pointers of +> +> cpu and current_cpu (line "0x48874a != 0x3c69e10"). +> +> +> +If you try this again with 8.2-rc2, you should not see an assertion failure. +> +You should see instead +> +> +QEMU internal SIGILL {code=ILLOPC, addr=0x12345678} +> +> +which I think more accurately summarizes the situation of attempting RDTSC on +> +hardware that does not support it. +> +> +Compilation of vanilla qemu v8.2.0-rc2 with -march=i486 by GCC 13.2.0 and +running the resulting binary on ryzen still leads to: + +** +ERROR:../accel/tcg/cpu-exec.c:533:cpu_exec_longjmp_cleanup: assertion failed: +(cpu == current_cpu) +Bail out! ERROR:../accel/tcg/cpu-exec.c:533:cpu_exec_longjmp_cleanup: assertion +failed: (cpu == current_cpu) +Aborted + +> +> +r~ +Petr + |