diff options
Diffstat (limited to 'results/classifier/zero-shot/003/KVM')
| -rw-r--r-- | results/classifier/zero-shot/003/KVM/04472277 | 579 | ||||
| -rw-r--r-- | results/classifier/zero-shot/003/KVM/26430026 | 168 | ||||
| -rw-r--r-- | results/classifier/zero-shot/003/KVM/33802194 | 4942 | ||||
| -rw-r--r-- | results/classifier/zero-shot/003/KVM/42613410 | 152 | ||||
| -rw-r--r-- | results/classifier/zero-shot/003/KVM/43643137 | 541 | ||||
| -rw-r--r-- | results/classifier/zero-shot/003/KVM/55961334 | 42 | ||||
| -rw-r--r-- | results/classifier/zero-shot/003/KVM/71456293 | 1489 | ||||
| -rw-r--r-- | results/classifier/zero-shot/003/KVM/80615920 | 351 |
8 files changed, 8264 insertions, 0 deletions
diff --git a/results/classifier/zero-shot/003/KVM/04472277 b/results/classifier/zero-shot/003/KVM/04472277 new file mode 100644 index 000000000..a8ac3b596 --- /dev/null +++ b/results/classifier/zero-shot/003/KVM/04472277 @@ -0,0 +1,579 @@ +KVM: 0.890 +network: 0.847 +other: 0.846 +instruction: 0.845 +boot: 0.831 +mistranslation: 0.817 +semantic: 0.815 + +[BUG][KVM_SET_USER_MEMORY_REGION] KVM_SET_USER_MEMORY_REGION failed + +Hi all, +I start a VM in openstack, and openstack use libvirt to start qemu VM, but now log show this ERROR. +Is there any one know this? +The ERROR log from /var/log/libvirt/qemu/instance-0000000e.log +``` +2023-03-14T10:09:17.674114Z qemu-system-x86_64: kvm_set_user_memory_region: KVM_SET_USER_MEMORY_REGION failed, slot=4, start=0xfffffffffe000000, size=0x2000: Invalid argument +kvm_set_phys_mem: error registering slot: Invalid argument +2023-03-14 10:09:18.198+0000: shutting down, reason=crashed +``` +The xml file +``` +root@c1c2:~# cat /etc/libvirt/qemu/instance-0000000e.xml +<!-- +WARNING: THIS IS AN AUTO-GENERATED FILE. CHANGES TO IT ARE LIKELY TO BE +OVERWRITTEN AND LOST. Changes to this xml configuration should be made using: + virsh edit instance-0000000e +or other application using the libvirt API. +--> +<domain type='kvm'> + <name>instance-0000000e</name> + <uuid>ff91d2dc-69a1-43ef-abde-c9e4e9a0305b</uuid> + <metadata> +  <nova:instance xmlns:nova=" +http://openstack.org/xmlns/libvirt/nova/1.1 +"> +   <nova:package version="25.1.0"/> +   <nova:name>provider-instance</nova:name> +   <nova:creationTime>2023-03-14 10:09:13</nova:creationTime> +   <nova:flavor name="cirros-os-dpu-test-1"> +    <nova:memory>64</nova:memory> +    <nova:disk>1</nova:disk> +    <nova:swap>0</nova:swap> +    <nova:ephemeral>0</nova:ephemeral> +    <nova:vcpus>1</nova:vcpus> +   </nova:flavor> +   <nova:owner> +    <nova:user uuid="ff627ad39ed94479b9c5033bc462cf78">admin</nova:user> +    <nova:project uuid="512866f9994f4ad8916d8539a7cdeec9">admin</nova:project> +   </nova:owner> +   <nova:root type="image" uuid="9e58cb69-316a-4093-9f23-c1d1bd8edffe"/> +   <nova:ports> +    <nova:port uuid="77c1dc00-af39-4463-bea0-12808f4bc340"> +     <nova:ip type="fixed" address="172.1.1.43" ipVersion="4"/> +    </nova:port> +   </nova:ports> +  </nova:instance> + </metadata> + <memory unit='KiB'>65536</memory> + <currentMemory unit='KiB'>65536</currentMemory> + <vcpu placement='static'>1</vcpu> + <sysinfo type='smbios'> +  <system> +   <entry name='manufacturer'>OpenStack Foundation</entry> +   <entry name='product'>OpenStack Nova</entry> +   <entry name='version'>25.1.0</entry> +   <entry name='serial'>ff91d2dc-69a1-43ef-abde-c9e4e9a0305b</entry> +   <entry name='uuid'>ff91d2dc-69a1-43ef-abde-c9e4e9a0305b</entry> +   <entry name='family'>Virtual Machine</entry> +  </system> + </sysinfo> + <os> +  <type arch='x86_64' machine='pc-i440fx-6.2'>hvm</type> +  <boot dev='hd'/> +  <smbios mode='sysinfo'/> + </os> + <features> +  <acpi/> +  <apic/> +  <vmcoreinfo state='on'/> + </features> + <cpu mode='host-model' check='partial'> +  <topology sockets='1' dies='1' cores='1' threads='1'/> + </cpu> + <clock offset='utc'> +  <timer name='pit' tickpolicy='delay'/> +  <timer name='rtc' tickpolicy='catchup'/> +  <timer name='hpet' present='no'/> + </clock> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> +  <emulator>/usr/bin/qemu-system-x86_64</emulator> +  <disk type='file' device='disk'> +   <driver name='qemu' type='qcow2' cache='none'/> +   <source file='/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/disk'/> +   <target dev='vda' bus='virtio'/> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> +  </disk> +  <controller type='usb' index='0' model='piix3-uhci'> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/> +  </controller> +  <controller type='pci' index='0' model='pci-root'/> +  <interface type='hostdev' managed='yes'> +   <mac address='fa:16:3e:aa:d9:23'/> +   <source> +    <address type='pci' domain='0x0000' bus='0x01' slot='0x00' function='0x5'/> +   </source> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/> +  </interface> +  <serial type='pty'> +   <log file='/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/console.log' append='off'/> +   <target type='isa-serial' port='0'> +    <model name='isa-serial'/> +   </target> +  </serial> +  <console type='pty'> +   <log file='/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/console.log' append='off'/> +   <target type='serial' port='0'/> +  </console> +  <input type='tablet' bus='usb'> +   <address type='usb' bus='0' port='1'/> +  </input> +  <input type='mouse' bus='ps2'/> +  <input type='keyboard' bus='ps2'/> +  <graphics type='vnc' port='-1' autoport='yes' listen='0.0.0.0'> +   <listen type='address' address='0.0.0.0'/> +  </graphics> +  <audio id='1' type='none'/> +  <video> +   <model type='virtio' heads='1' primary='yes'/> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> +  </video> +  <hostdev mode='subsystem' type='pci' managed='yes'> +   <source> +    <address domain='0x0000' bus='0x01' slot='0x00' function='0x6'/> +   </source> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x05' function='0x0'/> +  </hostdev> +  <memballoon model='virtio'> +   <stats period='10'/> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x06' function='0x0'/> +  </memballoon> +  <rng model='virtio'> +   <backend model='random'>/dev/urandom</backend> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x07' function='0x0'/> +  </rng> + </devices> +</domain> +``` +---- +Simon Jones + +This is happened in ubuntu22.04. +QEMU is install by apt like this: +apt install -y qemu qemu-kvm qemu-system +and QEMU version is 6.2.0 +---- +Simon Jones +Simon Jones < +batmanustc@gmail.com +> äº2023å¹´3æ21æ¥å¨äº 08:40åéï¼ +Hi all, +I start a VM in openstack, and openstack use libvirt to start qemu VM, but now log show this ERROR. +Is there any one know this? +The ERROR log from /var/log/libvirt/qemu/instance-0000000e.log +``` +2023-03-14T10:09:17.674114Z qemu-system-x86_64: kvm_set_user_memory_region: KVM_SET_USER_MEMORY_REGION failed, slot=4, start=0xfffffffffe000000, size=0x2000: Invalid argument +kvm_set_phys_mem: error registering slot: Invalid argument +2023-03-14 10:09:18.198+0000: shutting down, reason=crashed +``` +The xml file +``` +root@c1c2:~# cat /etc/libvirt/qemu/instance-0000000e.xml +<!-- +WARNING: THIS IS AN AUTO-GENERATED FILE. CHANGES TO IT ARE LIKELY TO BE +OVERWRITTEN AND LOST. Changes to this xml configuration should be made using: + virsh edit instance-0000000e +or other application using the libvirt API. +--> +<domain type='kvm'> + <name>instance-0000000e</name> + <uuid>ff91d2dc-69a1-43ef-abde-c9e4e9a0305b</uuid> + <metadata> +  <nova:instance xmlns:nova=" +http://openstack.org/xmlns/libvirt/nova/1.1 +"> +   <nova:package version="25.1.0"/> +   <nova:name>provider-instance</nova:name> +   <nova:creationTime>2023-03-14 10:09:13</nova:creationTime> +   <nova:flavor name="cirros-os-dpu-test-1"> +    <nova:memory>64</nova:memory> +    <nova:disk>1</nova:disk> +    <nova:swap>0</nova:swap> +    <nova:ephemeral>0</nova:ephemeral> +    <nova:vcpus>1</nova:vcpus> +   </nova:flavor> +   <nova:owner> +    <nova:user uuid="ff627ad39ed94479b9c5033bc462cf78">admin</nova:user> +    <nova:project uuid="512866f9994f4ad8916d8539a7cdeec9">admin</nova:project> +   </nova:owner> +   <nova:root type="image" uuid="9e58cb69-316a-4093-9f23-c1d1bd8edffe"/> +   <nova:ports> +    <nova:port uuid="77c1dc00-af39-4463-bea0-12808f4bc340"> +     <nova:ip type="fixed" address="172.1.1.43" ipVersion="4"/> +    </nova:port> +   </nova:ports> +  </nova:instance> + </metadata> + <memory unit='KiB'>65536</memory> + <currentMemory unit='KiB'>65536</currentMemory> + <vcpu placement='static'>1</vcpu> + <sysinfo type='smbios'> +  <system> +   <entry name='manufacturer'>OpenStack Foundation</entry> +   <entry name='product'>OpenStack Nova</entry> +   <entry name='version'>25.1.0</entry> +   <entry name='serial'>ff91d2dc-69a1-43ef-abde-c9e4e9a0305b</entry> +   <entry name='uuid'>ff91d2dc-69a1-43ef-abde-c9e4e9a0305b</entry> +   <entry name='family'>Virtual Machine</entry> +  </system> + </sysinfo> + <os> +  <type arch='x86_64' machine='pc-i440fx-6.2'>hvm</type> +  <boot dev='hd'/> +  <smbios mode='sysinfo'/> + </os> + <features> +  <acpi/> +  <apic/> +  <vmcoreinfo state='on'/> + </features> + <cpu mode='host-model' check='partial'> +  <topology sockets='1' dies='1' cores='1' threads='1'/> + </cpu> + <clock offset='utc'> +  <timer name='pit' tickpolicy='delay'/> +  <timer name='rtc' tickpolicy='catchup'/> +  <timer name='hpet' present='no'/> + </clock> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> +  <emulator>/usr/bin/qemu-system-x86_64</emulator> +  <disk type='file' device='disk'> +   <driver name='qemu' type='qcow2' cache='none'/> +   <source file='/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/disk'/> +   <target dev='vda' bus='virtio'/> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> +  </disk> +  <controller type='usb' index='0' model='piix3-uhci'> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/> +  </controller> +  <controller type='pci' index='0' model='pci-root'/> +  <interface type='hostdev' managed='yes'> +   <mac address='fa:16:3e:aa:d9:23'/> +   <source> +    <address type='pci' domain='0x0000' bus='0x01' slot='0x00' function='0x5'/> +   </source> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/> +  </interface> +  <serial type='pty'> +   <log file='/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/console.log' append='off'/> +   <target type='isa-serial' port='0'> +    <model name='isa-serial'/> +   </target> +  </serial> +  <console type='pty'> +   <log file='/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/console.log' append='off'/> +   <target type='serial' port='0'/> +  </console> +  <input type='tablet' bus='usb'> +   <address type='usb' bus='0' port='1'/> +  </input> +  <input type='mouse' bus='ps2'/> +  <input type='keyboard' bus='ps2'/> +  <graphics type='vnc' port='-1' autoport='yes' listen='0.0.0.0'> +   <listen type='address' address='0.0.0.0'/> +  </graphics> +  <audio id='1' type='none'/> +  <video> +   <model type='virtio' heads='1' primary='yes'/> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> +  </video> +  <hostdev mode='subsystem' type='pci' managed='yes'> +   <source> +    <address domain='0x0000' bus='0x01' slot='0x00' function='0x6'/> +   </source> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x05' function='0x0'/> +  </hostdev> +  <memballoon model='virtio'> +   <stats period='10'/> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x06' function='0x0'/> +  </memballoon> +  <rng model='virtio'> +   <backend model='random'>/dev/urandom</backend> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x07' function='0x0'/> +  </rng> + </devices> +</domain> +``` +---- +Simon Jones + +This is full ERROR log +2023-03-23 08:00:52.362+0000: starting up libvirt version: 8.0.0, package: 1ubuntu7.4 (Christian Ehrhardt < +christian.ehrhardt@canonical.com +> Tue, 22 Nov 2022 15:59:28 +0100), qemu version: 6.2.0Debian 1:6.2+dfsg-2ubuntu6.6, kernel: 5.19.0-35-generic, hostname: c1c2 +LC_ALL=C \ +PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin \ +HOME=/var/lib/libvirt/qemu/domain-4-instance-0000000e \ +XDG_DATA_HOME=/var/lib/libvirt/qemu/domain-4-instance-0000000e/.local/share \ +XDG_CACHE_HOME=/var/lib/libvirt/qemu/domain-4-instance-0000000e/.cache \ +XDG_CONFIG_HOME=/var/lib/libvirt/qemu/domain-4-instance-0000000e/.config \ +/usr/bin/qemu-system-x86_64 \ +-name guest=instance-0000000e,debug-threads=on \ +-S \ +-object '{"qom-type":"secret","id":"masterKey0","format":"raw","file":"/var/lib/libvirt/qemu/domain-4-instance-0000000e/master-key.aes"}' \ +-machine pc-i440fx-6.2,usb=off,dump-guest-core=off,memory-backend=pc.ram \ +-accel kvm \ +-cpu Cooperlake,ss=on,vmx=on,pdcm=on,hypervisor=on,tsc-adjust=on,sha-ni=on,umip=on,waitpkg=on,gfni=on,vaes=on,vpclmulqdq=on,rdpid=on,movdiri=on,movdir64b=on,fsrm=on,md-clear=on,avx-vnni=on,xsaves=on,ibpb=on,ibrs=on,amd-stibp=on,amd-ssbd=on,hle=off,rtm=off,avx512f=off,avx512dq=off,avx512cd=off,avx512bw=off,avx512vl=off,avx512vnni=off,avx512-bf16=off,taa-no=off \ +-m 64 \ +-object '{"qom-type":"memory-backend-ram","id":"pc.ram","size":67108864}' \ +-overcommit mem-lock=off \ +-smp 1,sockets=1,dies=1,cores=1,threads=1 \ +-uuid ff91d2dc-69a1-43ef-abde-c9e4e9a0305b \ +-smbios 'type=1,manufacturer=OpenStack Foundation,product=OpenStack Nova,version=25.1.0,serial=ff91d2dc-69a1-43ef-abde-c9e4e9a0305b,uuid=ff91d2dc-69a1-43ef-abde-c9e4e9a0305b,family=Virtual Machine' \ +-no-user-config \ +-nodefaults \ +-chardev socket,id=charmonitor,fd=33,server=on,wait=off \ +-mon chardev=charmonitor,id=monitor,mode=control \ +-rtc base=utc,driftfix=slew \ +-global kvm-pit.lost_tick_policy=delay \ +-no-hpet \ +-no-shutdown \ +-boot strict=on \ +-device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 \ +-blockdev '{"driver":"file","filename":"/var/lib/nova/instances/_base/8b58db82a488248e7c5e769599954adaa47a5314","node-name":"libvirt-2-storage","cache":{"direct":true,"no-flush":false},"auto-read-only":true,"discard":"unmap"}' \ +-blockdev '{"node-name":"libvirt-2-format","read-only":true,"cache":{"direct":true,"no-flush":false},"driver":"raw","file":"libvirt-2-storage"}' \ +-blockdev '{"driver":"file","filename":"/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/disk","node-name":"libvirt-1-storage","cache":{"direct":true,"no-flush":false},"auto-read-only":true,"discard":"unmap"}' \ +-blockdev '{"node-name":"libvirt-1-format","read-only":false,"cache":{"direct":true,"no-flush":false},"driver":"qcow2","file":"libvirt-1-storage","backing":"libvirt-2-format"}' \ +-device virtio-blk-pci,bus=pci.0,addr=0x3,drive=libvirt-1-format,id=virtio-disk0,bootindex=1,write-cache=on \ +-add-fd set=1,fd=34 \ +-chardev pty,id=charserial0,logfile=/dev/fdset/1,logappend=on \ +-device isa-serial,chardev=charserial0,id=serial0 \ +-device usb-tablet,id=input0,bus=usb.0,port=1 \ +-audiodev '{"id":"audio1","driver":"none"}' \ +-vnc +0.0.0.0:0 +,audiodev=audio1 \ +-device virtio-vga,id=video0,max_outputs=1,bus=pci.0,addr=0x2 \ +-device vfio-pci,host=0000:01:00.5,id=hostdev0,bus=pci.0,addr=0x4 \ +-device vfio-pci,host=0000:01:00.6,id=hostdev1,bus=pci.0,addr=0x5 \ +-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x6 \ +-object '{"qom-type":"rng-random","id":"objrng0","filename":"/dev/urandom"}' \ +-device virtio-rng-pci,rng=objrng0,id=rng0,bus=pci.0,addr=0x7 \ +-device vmcoreinfo \ +-sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny \ +-msg timestamp=on +char device redirected to /dev/pts/3 (label charserial0) +2023-03-23T08:00:53.728550Z qemu-system-x86_64: kvm_set_user_memory_region: KVM_SET_USER_MEMORY_REGION failed, slot=4, start=0xfffffffffe000000, size=0x2000: Invalid argument +kvm_set_phys_mem: error registering slot: Invalid argument +2023-03-23 08:00:54.201+0000: shutting down, reason=crashed +2023-03-23 08:54:43.468+0000: starting up libvirt version: 8.0.0, package: 1ubuntu7.4 (Christian Ehrhardt < +christian.ehrhardt@canonical.com +> Tue, 22 Nov 2022 15:59:28 +0100), qemu version: 6.2.0Debian 1:6.2+dfsg-2ubuntu6.6, kernel: 5.19.0-35-generic, hostname: c1c2 +LC_ALL=C \ +PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin \ +HOME=/var/lib/libvirt/qemu/domain-5-instance-0000000e \ +XDG_DATA_HOME=/var/lib/libvirt/qemu/domain-5-instance-0000000e/.local/share \ +XDG_CACHE_HOME=/var/lib/libvirt/qemu/domain-5-instance-0000000e/.cache \ +XDG_CONFIG_HOME=/var/lib/libvirt/qemu/domain-5-instance-0000000e/.config \ +/usr/bin/qemu-system-x86_64 \ +-name guest=instance-0000000e,debug-threads=on \ +-S \ +-object '{"qom-type":"secret","id":"masterKey0","format":"raw","file":"/var/lib/libvirt/qemu/domain-5-instance-0000000e/master-key.aes"}' \ +-machine pc-i440fx-6.2,usb=off,dump-guest-core=off,memory-backend=pc.ram \ +-accel kvm \ +-cpu Cooperlake,ss=on,vmx=on,pdcm=on,hypervisor=on,tsc-adjust=on,sha-ni=on,umip=on,waitpkg=on,gfni=on,vaes=on,vpclmulqdq=on,rdpid=on,movdiri=on,movdir64b=on,fsrm=on,md-clear=on,avx-vnni=on,xsaves=on,ibpb=on,ibrs=on,amd-stibp=on,amd-ssbd=on,hle=off,rtm=off,avx512f=off,avx512dq=off,avx512cd=off,avx512bw=off,avx512vl=off,avx512vnni=off,avx512-bf16=off,taa-no=off \ +-m 64 \ +-object '{"qom-type":"memory-backend-ram","id":"pc.ram","size":67108864}' \ +-overcommit mem-lock=off \ +-smp 1,sockets=1,dies=1,cores=1,threads=1 \ +-uuid ff91d2dc-69a1-43ef-abde-c9e4e9a0305b \ +-smbios 'type=1,manufacturer=OpenStack Foundation,product=OpenStack Nova,version=25.1.0,serial=ff91d2dc-69a1-43ef-abde-c9e4e9a0305b,uuid=ff91d2dc-69a1-43ef-abde-c9e4e9a0305b,family=Virtual Machine' \ +-no-user-config \ +-nodefaults \ +-chardev socket,id=charmonitor,fd=33,server=on,wait=off \ +-mon chardev=charmonitor,id=monitor,mode=control \ +-rtc base=utc,driftfix=slew \ +-global kvm-pit.lost_tick_policy=delay \ +-no-hpet \ +-no-shutdown \ +-boot strict=on \ +-device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 \ +-blockdev '{"driver":"file","filename":"/var/lib/nova/instances/_base/8b58db82a488248e7c5e769599954adaa47a5314","node-name":"libvirt-2-storage","cache":{"direct":true,"no-flush":false},"auto-read-only":true,"discard":"unmap"}' \ +-blockdev '{"node-name":"libvirt-2-format","read-only":true,"cache":{"direct":true,"no-flush":false},"driver":"raw","file":"libvirt-2-storage"}' \ +-blockdev '{"driver":"file","filename":"/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/disk","node-name":"libvirt-1-storage","cache":{"direct":true,"no-flush":false},"auto-read-only":true,"discard":"unmap"}' \ +-blockdev '{"node-name":"libvirt-1-format","read-only":false,"cache":{"direct":true,"no-flush":false},"driver":"qcow2","file":"libvirt-1-storage","backing":"libvirt-2-format"}' \ +-device virtio-blk-pci,bus=pci.0,addr=0x3,drive=libvirt-1-format,id=virtio-disk0,bootindex=1,write-cache=on \ +-add-fd set=1,fd=34 \ +-chardev pty,id=charserial0,logfile=/dev/fdset/1,logappend=on \ +-device isa-serial,chardev=charserial0,id=serial0 \ +-device usb-tablet,id=input0,bus=usb.0,port=1 \ +-audiodev '{"id":"audio1","driver":"none"}' \ +-vnc +0.0.0.0:0 +,audiodev=audio1 \ +-device virtio-vga,id=video0,max_outputs=1,bus=pci.0,addr=0x2 \ +-device vfio-pci,host=0000:01:00.5,id=hostdev0,bus=pci.0,addr=0x4 \ +-device vfio-pci,host=0000:01:00.6,id=hostdev1,bus=pci.0,addr=0x5 \ +-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x6 \ +-object '{"qom-type":"rng-random","id":"objrng0","filename":"/dev/urandom"}' \ +-device virtio-rng-pci,rng=objrng0,id=rng0,bus=pci.0,addr=0x7 \ +-device vmcoreinfo \ +-sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny \ +-msg timestamp=on +char device redirected to /dev/pts/3 (label charserial0) +2023-03-23T08:54:44.755039Z qemu-system-x86_64: kvm_set_user_memory_region: KVM_SET_USER_MEMORY_REGION failed, slot=4, start=0xfffffffffe000000, size=0x2000: Invalid argument +kvm_set_phys_mem: error registering slot: Invalid argument +2023-03-23 08:54:45.230+0000: shutting down, reason=crashed +---- +Simon Jones +Simon Jones < +batmanustc@gmail.com +> äº2023å¹´3æ23æ¥å¨å 05:49åéï¼ +This is happened in ubuntu22.04. +QEMU is install by apt like this: +apt install -y qemu qemu-kvm qemu-system +and QEMU version is 6.2.0 +---- +Simon Jones +Simon Jones < +batmanustc@gmail.com +> äº2023å¹´3æ21æ¥å¨äº 08:40åéï¼ +Hi all, +I start a VM in openstack, and openstack use libvirt to start qemu VM, but now log show this ERROR. +Is there any one know this? +The ERROR log from /var/log/libvirt/qemu/instance-0000000e.log +``` +2023-03-14T10:09:17.674114Z qemu-system-x86_64: kvm_set_user_memory_region: KVM_SET_USER_MEMORY_REGION failed, slot=4, start=0xfffffffffe000000, size=0x2000: Invalid argument +kvm_set_phys_mem: error registering slot: Invalid argument +2023-03-14 10:09:18.198+0000: shutting down, reason=crashed +``` +The xml file +``` +root@c1c2:~# cat /etc/libvirt/qemu/instance-0000000e.xml +<!-- +WARNING: THIS IS AN AUTO-GENERATED FILE. CHANGES TO IT ARE LIKELY TO BE +OVERWRITTEN AND LOST. Changes to this xml configuration should be made using: + virsh edit instance-0000000e +or other application using the libvirt API. +--> +<domain type='kvm'> + <name>instance-0000000e</name> + <uuid>ff91d2dc-69a1-43ef-abde-c9e4e9a0305b</uuid> + <metadata> +  <nova:instance xmlns:nova=" +http://openstack.org/xmlns/libvirt/nova/1.1 +"> +   <nova:package version="25.1.0"/> +   <nova:name>provider-instance</nova:name> +   <nova:creationTime>2023-03-14 10:09:13</nova:creationTime> +   <nova:flavor name="cirros-os-dpu-test-1"> +    <nova:memory>64</nova:memory> +    <nova:disk>1</nova:disk> +    <nova:swap>0</nova:swap> +    <nova:ephemeral>0</nova:ephemeral> +    <nova:vcpus>1</nova:vcpus> +   </nova:flavor> +   <nova:owner> +    <nova:user uuid="ff627ad39ed94479b9c5033bc462cf78">admin</nova:user> +    <nova:project uuid="512866f9994f4ad8916d8539a7cdeec9">admin</nova:project> +   </nova:owner> +   <nova:root type="image" uuid="9e58cb69-316a-4093-9f23-c1d1bd8edffe"/> +   <nova:ports> +    <nova:port uuid="77c1dc00-af39-4463-bea0-12808f4bc340"> +     <nova:ip type="fixed" address="172.1.1.43" ipVersion="4"/> +    </nova:port> +   </nova:ports> +  </nova:instance> + </metadata> + <memory unit='KiB'>65536</memory> + <currentMemory unit='KiB'>65536</currentMemory> + <vcpu placement='static'>1</vcpu> + <sysinfo type='smbios'> +  <system> +   <entry name='manufacturer'>OpenStack Foundation</entry> +   <entry name='product'>OpenStack Nova</entry> +   <entry name='version'>25.1.0</entry> +   <entry name='serial'>ff91d2dc-69a1-43ef-abde-c9e4e9a0305b</entry> +   <entry name='uuid'>ff91d2dc-69a1-43ef-abde-c9e4e9a0305b</entry> +   <entry name='family'>Virtual Machine</entry> +  </system> + </sysinfo> + <os> +  <type arch='x86_64' machine='pc-i440fx-6.2'>hvm</type> +  <boot dev='hd'/> +  <smbios mode='sysinfo'/> + </os> + <features> +  <acpi/> +  <apic/> +  <vmcoreinfo state='on'/> + </features> + <cpu mode='host-model' check='partial'> +  <topology sockets='1' dies='1' cores='1' threads='1'/> + </cpu> + <clock offset='utc'> +  <timer name='pit' tickpolicy='delay'/> +  <timer name='rtc' tickpolicy='catchup'/> +  <timer name='hpet' present='no'/> + </clock> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> +  <emulator>/usr/bin/qemu-system-x86_64</emulator> +  <disk type='file' device='disk'> +   <driver name='qemu' type='qcow2' cache='none'/> +   <source file='/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/disk'/> +   <target dev='vda' bus='virtio'/> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> +  </disk> +  <controller type='usb' index='0' model='piix3-uhci'> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/> +  </controller> +  <controller type='pci' index='0' model='pci-root'/> +  <interface type='hostdev' managed='yes'> +   <mac address='fa:16:3e:aa:d9:23'/> +   <source> +    <address type='pci' domain='0x0000' bus='0x01' slot='0x00' function='0x5'/> +   </source> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/> +  </interface> +  <serial type='pty'> +   <log file='/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/console.log' append='off'/> +   <target type='isa-serial' port='0'> +    <model name='isa-serial'/> +   </target> +  </serial> +  <console type='pty'> +   <log file='/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/console.log' append='off'/> +   <target type='serial' port='0'/> +  </console> +  <input type='tablet' bus='usb'> +   <address type='usb' bus='0' port='1'/> +  </input> +  <input type='mouse' bus='ps2'/> +  <input type='keyboard' bus='ps2'/> +  <graphics type='vnc' port='-1' autoport='yes' listen='0.0.0.0'> +   <listen type='address' address='0.0.0.0'/> +  </graphics> +  <audio id='1' type='none'/> +  <video> +   <model type='virtio' heads='1' primary='yes'/> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> +  </video> +  <hostdev mode='subsystem' type='pci' managed='yes'> +   <source> +    <address domain='0x0000' bus='0x01' slot='0x00' function='0x6'/> +   </source> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x05' function='0x0'/> +  </hostdev> +  <memballoon model='virtio'> +   <stats period='10'/> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x06' function='0x0'/> +  </memballoon> +  <rng model='virtio'> +   <backend model='random'>/dev/urandom</backend> +   <address type='pci' domain='0x0000' bus='0x00' slot='0x07' function='0x0'/> +  </rng> + </devices> +</domain> +``` +---- +Simon Jones + diff --git a/results/classifier/zero-shot/003/KVM/26430026 b/results/classifier/zero-shot/003/KVM/26430026 new file mode 100644 index 000000000..5ecea6ffe --- /dev/null +++ b/results/classifier/zero-shot/003/KVM/26430026 @@ -0,0 +1,168 @@ +KVM: 0.919 +mistranslation: 0.915 +semantic: 0.904 +instruction: 0.888 +boot: 0.841 +other: 0.813 +network: 0.758 + +[BUG] cxl,i386: e820 mappings may not be correct for cxl + +Context included below from prior discussion + - `cxl create-region` would fail on inability to allocate memory + - traced this down to the memory region being marked RESERVED + - E820 map marks the CXL fixed memory window as RESERVED + + +Re: x86 errors, I found that region worked with this patch. (I also +added the SRAT patches the Davidlohr posted, but I do not think they are +relevant). + +I don't think this is correct, and setting this to E820_RAM causes the +system to fail to boot at all, but with this change `cxl create-region` +succeeds, which suggests our e820 mappings in the i386 machine are +incorrect. + +Anyone who can help or have an idea as to what e820 should actually be +doing with this region, or if this is correct and something else is +failing, please help! + + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 566accf7e6..a5e688a742 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -1077,7 +1077,7 @@ void pc_memory_init(PCMachineState *pcms, + memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, fw, + "cxl-fixed-memory-region", fw->size); + memory_region_add_subregion(system_memory, fw->base, &fw->mr); +- e820_add_entry(fw->base, fw->size, E820_RESERVED); ++ e820_add_entry(fw->base, fw->size, E820_NVS); + cxl_fmw_base += fw->size; + cxl_resv_end = cxl_fmw_base; + } + + +On Mon, Oct 10, 2022 at 05:32:42PM +0100, Jonathan Cameron wrote: +> +> +> > but i'm not sure of what to do with this info. We have some proof +> +> > that real hardware works with this no problem, and the only difference +> +> > is that the EFI/bios/firmware is setting the memory regions as `usable` +> +> > or `soft reserved`, which would imply the EDK2 is the blocker here +> +> > regardless of the OS driver status. +> +> > +> +> > But I'd seen elsewhere you had gotten some of this working, and I'm +> +> > failing to get anything working at the moment. If you have any input i +> +> > would greatly appreciate the help. +> +> > +> +> > QEMU config: +> +> > +> +> > /opt/qemu-cxl2/bin/qemu-system-x86_64 \ +> +> > -drive +> +> > file=/var/lib/libvirt/images/cxl.qcow2,format=qcow2,index=0,media=d\ +> +> > -m 2G,slots=4,maxmem=4G \ +> +> > -smp 4 \ +> +> > -machine type=q35,accel=kvm,cxl=on \ +> +> > -enable-kvm \ +> +> > -nographic \ +> +> > -device pxb-cxl,id=cxl.0,bus=pcie.0,bus_nr=52 \ +> +> > -device cxl-rp,id=rp0,bus=cxl.0,chassis=0,slot=0 \ +> +> > -object memory-backend-file,id=cxl-mem0,mem-path=/tmp/cxl-mem0,size=256M \ +> +> > -object memory-backend-file,id=lsa0,mem-path=/tmp/cxl-lsa0,size=256M \ +> +> > -device cxl-type3,bus=rp0,pmem=true,memdev=cxl-mem0,lsa=lsa0,id=cxl-pmem0 +> +> > \ +> +> > -M cxl-fmw.0.targets.0=cxl.0,cxl-fmw.0.size=256M +> +> > +> +> > I'd seen on the lists that you had seen issues with single-rp setups, +> +> > but no combination of configuration I've tried (including all the ones +> +> > in the docs and tests) lead to a successful region creation with +> +> > `cxl create-region` +> +> +> +> Hmm. Let me have a play. I've not run x86 tests for a while so +> +> perhaps something is missing there. +> +> +> +> I'm carrying a patch to override check_last_peer() in +> +> cxl_port_setup_targets() as that is wrong for some combinations, +> +> but that doesn't look like it's related to what you are seeing. +> +> +I'm not sure if it's relevant, but turned out I'd forgotten I'm carrying 3 +> +patches that aren't upstream (and one is a horrible hack). +> +> +Hack: +https://lore.kernel.org/linux-cxl/20220819094655.000005ed@huawei.com/ +> +Shouldn't affect a simple case like this... +> +> +https://lore.kernel.org/linux-cxl/20220819093133.00006c22@huawei.com/T/#t +> +(Dan's version) +> +> +https://lore.kernel.org/linux-cxl/20220815154044.24733-1-Jonathan.Cameron@huawei.com/T/#t +> +> +For writes to work you will currently need two rps (nothing on the second is +> +fine) +> +as we still haven't resolved if the kernel should support an HDM decoder on +> +a host bridge with one port. I think it should (Spec allows it), others +> +unconvinced. +> +> +Note I haven't shifted over to x86 yet so may still be something different +> +from +> +arm64. +> +> +Jonathan +> +> + diff --git a/results/classifier/zero-shot/003/KVM/33802194 b/results/classifier/zero-shot/003/KVM/33802194 new file mode 100644 index 000000000..5a0c1895c --- /dev/null +++ b/results/classifier/zero-shot/003/KVM/33802194 @@ -0,0 +1,4942 @@ +KVM: 0.725 +instruction: 0.693 +mistranslation: 0.687 +semantic: 0.656 +network: 0.644 +other: 0.637 +boot: 0.631 + +[BUG] cxl can not create region + +Hi list + +I want to test cxl functions in arm64, and found some problems I can't +figure out. + +My test environment: + +1. build latest bios from +https://github.com/tianocore/edk2.git +master +branch(cc2db6ebfb6d9d85ba4c7b35fba1fa37fffc0bc2) +2. build latest qemu-system-aarch64 from git://git.qemu.org/qemu.git +master branch(846dcf0ba4eff824c295f06550b8673ff3f31314). With cxl arm +support patch: +https://patchwork.kernel.org/project/cxl/cover/20220616141950.23374-1-Jonathan.Cameron@huawei.com/ +3. build Linux kernel from +https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git +preview +branch(65fc1c3d26b96002a5aa1f4012fae4dc98fd5683) +4. build latest ndctl tools from +https://github.com/pmem/ndctl +create_region branch(8558b394e449779e3a4f3ae90fae77ede0bca159) + +And my qemu test commands: +sudo $QEMU_BIN -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 \ + -cpu max -smp 8 -nographic -no-reboot \ + -kernel $KERNEL -bios $BIOS_BIN \ + -drive if=none,file=$ROOTFS,format=qcow2,id=hd \ + -device virtio-blk-pci,drive=hd -append 'root=/dev/vda1 +nokaslr dyndbg="module cxl* +p"' \ + -object memory-backend-ram,size=4G,id=mem0 \ + -numa node,nodeid=0,cpus=0-7,memdev=mem0 \ + -net nic -net user,hostfwd=tcp::2222-:22 -enable-kvm \ + -object +memory-backend-file,id=cxl-mem0,share=on,mem-path=/tmp/cxltest.raw,size=256M +\ + -object +memory-backend-file,id=cxl-mem1,share=on,mem-path=/tmp/cxltest1.raw,size=256M +\ + -object +memory-backend-file,id=cxl-mem2,share=on,mem-path=/tmp/cxltest2.raw,size=256M +\ + -object +memory-backend-file,id=cxl-mem3,share=on,mem-path=/tmp/cxltest3.raw,size=256M +\ + -object +memory-backend-file,id=cxl-lsa0,share=on,mem-path=/tmp/lsa0.raw,size=256M +\ + -object +memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/lsa1.raw,size=256M +\ + -object +memory-backend-file,id=cxl-lsa2,share=on,mem-path=/tmp/lsa2.raw,size=256M +\ + -object +memory-backend-file,id=cxl-lsa3,share=on,mem-path=/tmp/lsa3.raw,size=256M +\ + -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \ + -device cxl-rp,port=0,bus=cxl.1,id=root_port0,chassis=0,slot=0 \ + -device cxl-upstream,bus=root_port0,id=us0 \ + -device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \ + -device +cxl-type3,bus=swport0,memdev=cxl-mem0,lsa=cxl-lsa0,id=cxl-pmem0 \ + -device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \ + -device +cxl-type3,bus=swport1,memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem1 \ + -device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \ + -device +cxl-type3,bus=swport2,memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem2 \ + -device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \ + -device +cxl-type3,bus=swport3,memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3 \ + -M +cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k + +And I have got two problems. +1. When I want to create x1 region with command: "cxl create-region -d +decoder0.0 -w 1 -g 4096 mem0", kernel crashed with null pointer +reference. Crash log: + +[ 534.697324] cxl_region region0: config state: 0 +[ 534.697346] cxl_region region0: probe: -6 +[ 534.697368] cxl_acpi ACPI0017:00: decoder0.0: created region0 +[ 534.699115] cxl region0: mem0:endpoint3 decoder3.0 add: +mem0:decoder3.0 @ 0 next: none nr_eps: 1 nr_targets: 1 +[ 534.699149] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +mem0:decoder3.0 @ 0 next: mem0 nr_eps: 1 nr_targets: 1 +[ 534.699167] cxl region0: ACPI0016:00:port1 decoder1.0 add: +mem0:decoder3.0 @ 0 next: 0000:0d:00.0 nr_eps: 1 nr_targets: 1 +[ 534.699176] cxl region0: ACPI0016:00:port1 iw: 1 ig: 256 +[ 534.699182] cxl region0: ACPI0016:00:port1 target[0] = 0000:0c:00.0 +for mem0:decoder3.0 @ 0 +[ 534.699189] cxl region0: 0000:0d:00.0:port2 iw: 1 ig: 256 +[ 534.699193] cxl region0: 0000:0d:00.0:port2 target[0] = +0000:0e:00.0 for mem0:decoder3.0 @ 0 +[ 534.699405] Unable to handle kernel NULL pointer dereference at +virtual address 0000000000000000 +[ 534.701474] Mem abort info: +[ 534.701994] ESR = 0x0000000086000004 +[ 534.702653] EC = 0x21: IABT (current EL), IL = 32 bits +[ 534.703616] SET = 0, FnV = 0 +[ 534.704174] EA = 0, S1PTW = 0 +[ 534.704803] FSC = 0x04: level 0 translation fault +[ 534.705694] user pgtable: 4k pages, 48-bit VAs, pgdp=000000010144a000 +[ 534.706875] [0000000000000000] pgd=0000000000000000, p4d=0000000000000000 +[ 534.709855] Internal error: Oops: 86000004 [#1] PREEMPT SMP +[ 534.710301] Modules linked in: +[ 534.710546] CPU: 7 PID: 331 Comm: cxl Not tainted +5.19.0-rc3-00064-g65fc1c3d26b9-dirty #11 +[ 534.715393] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 +[ 534.717179] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) +[ 534.719190] pc : 0x0 +[ 534.719928] lr : commit_store+0x118/0x2cc +[ 534.721007] sp : ffff80000aec3c30 +[ 534.721793] x29: ffff80000aec3c30 x28: ffff0000da62e740 x27: ffff0000c0c06b30 +[ 534.723875] x26: 0000000000000000 x25: ffff0000c0a2a400 x24: ffff0000c0a29400 +[ 534.725440] x23: 0000000000000003 x22: 0000000000000000 x21: ffff0000c0c06800 +[ 534.727312] x20: 0000000000000000 x19: ffff0000c1559800 x18: 0000000000000000 +[ 534.729138] x17: 0000000000000000 x16: 0000000000000000 x15: 0000ffffd41fe838 +[ 534.731046] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 +[ 534.732402] x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000 +[ 534.734432] x8 : 0000000000000000 x7 : 0000000000000000 x6 : ffff0000c0906e80 +[ 534.735921] x5 : 0000000000000000 x4 : 0000000000000000 x3 : ffff80000aec3bf0 +[ 534.737437] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff0000c155a000 +[ 534.738878] Call trace: +[ 534.739368] 0x0 +[ 534.739713] dev_attr_store+0x1c/0x30 +[ 534.740186] sysfs_kf_write+0x48/0x58 +[ 534.740961] kernfs_fop_write_iter+0x128/0x184 +[ 534.741872] new_sync_write+0xdc/0x158 +[ 534.742706] vfs_write+0x1ac/0x2a8 +[ 534.743440] ksys_write+0x68/0xf0 +[ 534.744328] __arm64_sys_write+0x1c/0x28 +[ 534.745180] invoke_syscall+0x44/0xf0 +[ 534.745989] el0_svc_common+0x4c/0xfc +[ 534.746661] do_el0_svc+0x60/0xa8 +[ 534.747378] el0_svc+0x2c/0x78 +[ 534.748066] el0t_64_sync_handler+0xb8/0x12c +[ 534.748919] el0t_64_sync+0x18c/0x190 +[ 534.749629] Code: bad PC value +[ 534.750169] ---[ end trace 0000000000000000 ]--- + +2. When I want to create x4 region with command: "cxl create-region -d +decoder0.0 -w 4 -g 4096 -m mem0 mem1 mem2 mem3". I got below errors: + +cxl region: create_region: region0: failed to set target3 to mem3 +cxl region: cmd_create_region: created 0 regions + +And kernel log as below: +[ 60.536663] cxl_region region0: config state: 0 +[ 60.536675] cxl_region region0: probe: -6 +[ 60.536696] cxl_acpi ACPI0017:00: decoder0.0: created region0 +[ 60.538251] cxl region0: mem0:endpoint3 decoder3.0 add: +mem0:decoder3.0 @ 0 next: none nr_eps: 1 nr_targets: 1 +[ 60.538278] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +mem0:decoder3.0 @ 0 next: mem0 nr_eps: 1 nr_targets: 1 +[ 60.538295] cxl region0: ACPI0016:00:port1 decoder1.0 add: +mem0:decoder3.0 @ 0 next: 0000:0d:00.0 nr_eps: 1 nr_targets: 1 +[ 60.538647] cxl region0: mem1:endpoint4 decoder4.0 add: +mem1:decoder4.0 @ 1 next: none nr_eps: 1 nr_targets: 1 +[ 60.538663] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +mem1:decoder4.0 @ 1 next: mem1 nr_eps: 2 nr_targets: 2 +[ 60.538675] cxl region0: ACPI0016:00:port1 decoder1.0 add: +mem1:decoder4.0 @ 1 next: 0000:0d:00.0 nr_eps: 2 nr_targets: 1 +[ 60.539311] cxl region0: mem2:endpoint5 decoder5.0 add: +mem2:decoder5.0 @ 2 next: none nr_eps: 1 nr_targets: 1 +[ 60.539332] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +mem2:decoder5.0 @ 2 next: mem2 nr_eps: 3 nr_targets: 3 +[ 60.539343] cxl region0: ACPI0016:00:port1 decoder1.0 add: +mem2:decoder5.0 @ 2 next: 0000:0d:00.0 nr_eps: 3 nr_targets: 1 +[ 60.539711] cxl region0: mem3:endpoint6 decoder6.0 add: +mem3:decoder6.0 @ 3 next: none nr_eps: 1 nr_targets: 1 +[ 60.539723] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +mem3:decoder6.0 @ 3 next: mem3 nr_eps: 4 nr_targets: 4 +[ 60.539735] cxl region0: ACPI0016:00:port1 decoder1.0 add: +mem3:decoder6.0 @ 3 next: 0000:0d:00.0 nr_eps: 4 nr_targets: 1 +[ 60.539742] cxl region0: ACPI0016:00:port1 iw: 1 ig: 256 +[ 60.539747] cxl region0: ACPI0016:00:port1 target[0] = 0000:0c:00.0 +for mem0:decoder3.0 @ 0 +[ 60.539754] cxl region0: 0000:0d:00.0:port2 iw: 4 ig: 512 +[ 60.539758] cxl region0: 0000:0d:00.0:port2 target[0] = +0000:0e:00.0 for mem0:decoder3.0 @ 0 +[ 60.539764] cxl region0: ACPI0016:00:port1: cannot host mem1:decoder4.0 at 1 + +I have tried to write sysfs node manually, got same errors. + +Hope I can get some helps here. + +Bob + +On Fri, 5 Aug 2022 10:20:23 +0800 +Bobo WL <lmw.bobo@gmail.com> wrote: + +> +Hi list +> +> +I want to test cxl functions in arm64, and found some problems I can't +> +figure out. +Hi Bob, + +Glad to see people testing this code. + +> +> +My test environment: +> +> +1. build latest bios from +https://github.com/tianocore/edk2.git +master +> +branch(cc2db6ebfb6d9d85ba4c7b35fba1fa37fffc0bc2) +> +2. build latest qemu-system-aarch64 from git://git.qemu.org/qemu.git +> +master branch(846dcf0ba4eff824c295f06550b8673ff3f31314). With cxl arm +> +support patch: +> +https://patchwork.kernel.org/project/cxl/cover/20220616141950.23374-1-Jonathan.Cameron@huawei.com/ +> +3. build Linux kernel from +> +https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git +preview +> +branch(65fc1c3d26b96002a5aa1f4012fae4dc98fd5683) +> +4. build latest ndctl tools from +https://github.com/pmem/ndctl +> +create_region branch(8558b394e449779e3a4f3ae90fae77ede0bca159) +> +> +And my qemu test commands: +> +sudo $QEMU_BIN -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 \ +> +-cpu max -smp 8 -nographic -no-reboot \ +> +-kernel $KERNEL -bios $BIOS_BIN \ +> +-drive if=none,file=$ROOTFS,format=qcow2,id=hd \ +> +-device virtio-blk-pci,drive=hd -append 'root=/dev/vda1 +> +nokaslr dyndbg="module cxl* +p"' \ +> +-object memory-backend-ram,size=4G,id=mem0 \ +> +-numa node,nodeid=0,cpus=0-7,memdev=mem0 \ +> +-net nic -net user,hostfwd=tcp::2222-:22 -enable-kvm \ +> +-object +> +memory-backend-file,id=cxl-mem0,share=on,mem-path=/tmp/cxltest.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-mem1,share=on,mem-path=/tmp/cxltest1.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-mem2,share=on,mem-path=/tmp/cxltest2.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-mem3,share=on,mem-path=/tmp/cxltest3.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-lsa0,share=on,mem-path=/tmp/lsa0.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/lsa1.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-lsa2,share=on,mem-path=/tmp/lsa2.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-lsa3,share=on,mem-path=/tmp/lsa3.raw,size=256M +> +\ +> +-device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \ +> +-device cxl-rp,port=0,bus=cxl.1,id=root_port0,chassis=0,slot=0 \ +Probably not related to your problem, but there is a disconnect in QEMU / +kernel assumptionsaround the presence of an HDM decoder when a HB only +has a single root port. Spec allows it to be provided or not as an +implementation choice. +Kernel assumes it isn't provide. Qemu assumes it is. + +The temporary solution is to throw in a second root port on the HB and not +connect anything to it. Longer term I may special case this so that the +particular +decoder defaults to pass through settings in QEMU if there is only one root +port. + +> +-device cxl-upstream,bus=root_port0,id=us0 \ +> +-device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \ +> +-device +> +cxl-type3,bus=swport0,memdev=cxl-mem0,lsa=cxl-lsa0,id=cxl-pmem0 \ +> +-device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \ +> +-device +> +cxl-type3,bus=swport1,memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem1 \ +> +-device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \ +> +-device +> +cxl-type3,bus=swport2,memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem2 \ +> +-device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \ +> +-device +> +cxl-type3,bus=swport3,memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3 \ +> +-M +> +cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k +> +> +And I have got two problems. +> +1. When I want to create x1 region with command: "cxl create-region -d +> +decoder0.0 -w 1 -g 4096 mem0", kernel crashed with null pointer +> +reference. Crash log: +> +> +[ 534.697324] cxl_region region0: config state: 0 +> +[ 534.697346] cxl_region region0: probe: -6 +Seems odd this is up here. But maybe fine. + +> +[ 534.697368] cxl_acpi ACPI0017:00: decoder0.0: created region0 +> +[ 534.699115] cxl region0: mem0:endpoint3 decoder3.0 add: +> +mem0:decoder3.0 @ 0 next: none nr_eps: 1 nr_targets: 1 +> +[ 534.699149] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +> +mem0:decoder3.0 @ 0 next: mem0 nr_eps: 1 nr_targets: 1 +> +[ 534.699167] cxl region0: ACPI0016:00:port1 decoder1.0 add: +> +mem0:decoder3.0 @ 0 next: 0000:0d:00.0 nr_eps: 1 nr_targets: 1 +> +[ 534.699176] cxl region0: ACPI0016:00:port1 iw: 1 ig: 256 +> +[ 534.699182] cxl region0: ACPI0016:00:port1 target[0] = 0000:0c:00.0 +> +for mem0:decoder3.0 @ 0 +> +[ 534.699189] cxl region0: 0000:0d:00.0:port2 iw: 1 ig: 256 +> +[ 534.699193] cxl region0: 0000:0d:00.0:port2 target[0] = +> +0000:0e:00.0 for mem0:decoder3.0 @ 0 +> +[ 534.699405] Unable to handle kernel NULL pointer dereference at +> +virtual address 0000000000000000 +> +[ 534.701474] Mem abort info: +> +[ 534.701994] ESR = 0x0000000086000004 +> +[ 534.702653] EC = 0x21: IABT (current EL), IL = 32 bits +> +[ 534.703616] SET = 0, FnV = 0 +> +[ 534.704174] EA = 0, S1PTW = 0 +> +[ 534.704803] FSC = 0x04: level 0 translation fault +> +[ 534.705694] user pgtable: 4k pages, 48-bit VAs, pgdp=000000010144a000 +> +[ 534.706875] [0000000000000000] pgd=0000000000000000, p4d=0000000000000000 +> +[ 534.709855] Internal error: Oops: 86000004 [#1] PREEMPT SMP +> +[ 534.710301] Modules linked in: +> +[ 534.710546] CPU: 7 PID: 331 Comm: cxl Not tainted +> +5.19.0-rc3-00064-g65fc1c3d26b9-dirty #11 +> +[ 534.715393] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 +> +[ 534.717179] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) +> +[ 534.719190] pc : 0x0 +> +[ 534.719928] lr : commit_store+0x118/0x2cc +> +[ 534.721007] sp : ffff80000aec3c30 +> +[ 534.721793] x29: ffff80000aec3c30 x28: ffff0000da62e740 x27: +> +ffff0000c0c06b30 +> +[ 534.723875] x26: 0000000000000000 x25: ffff0000c0a2a400 x24: +> +ffff0000c0a29400 +> +[ 534.725440] x23: 0000000000000003 x22: 0000000000000000 x21: +> +ffff0000c0c06800 +> +[ 534.727312] x20: 0000000000000000 x19: ffff0000c1559800 x18: +> +0000000000000000 +> +[ 534.729138] x17: 0000000000000000 x16: 0000000000000000 x15: +> +0000ffffd41fe838 +> +[ 534.731046] x14: 0000000000000000 x13: 0000000000000000 x12: +> +0000000000000000 +> +[ 534.732402] x11: 0000000000000000 x10: 0000000000000000 x9 : +> +0000000000000000 +> +[ 534.734432] x8 : 0000000000000000 x7 : 0000000000000000 x6 : +> +ffff0000c0906e80 +> +[ 534.735921] x5 : 0000000000000000 x4 : 0000000000000000 x3 : +> +ffff80000aec3bf0 +> +[ 534.737437] x2 : 0000000000000000 x1 : 0000000000000000 x0 : +> +ffff0000c155a000 +> +[ 534.738878] Call trace: +> +[ 534.739368] 0x0 +> +[ 534.739713] dev_attr_store+0x1c/0x30 +> +[ 534.740186] sysfs_kf_write+0x48/0x58 +> +[ 534.740961] kernfs_fop_write_iter+0x128/0x184 +> +[ 534.741872] new_sync_write+0xdc/0x158 +> +[ 534.742706] vfs_write+0x1ac/0x2a8 +> +[ 534.743440] ksys_write+0x68/0xf0 +> +[ 534.744328] __arm64_sys_write+0x1c/0x28 +> +[ 534.745180] invoke_syscall+0x44/0xf0 +> +[ 534.745989] el0_svc_common+0x4c/0xfc +> +[ 534.746661] do_el0_svc+0x60/0xa8 +> +[ 534.747378] el0_svc+0x2c/0x78 +> +[ 534.748066] el0t_64_sync_handler+0xb8/0x12c +> +[ 534.748919] el0t_64_sync+0x18c/0x190 +> +[ 534.749629] Code: bad PC value +> +[ 534.750169] ---[ end trace 0000000000000000 ]--- +> +> +2. When I want to create x4 region with command: "cxl create-region -d +> +decoder0.0 -w 4 -g 4096 -m mem0 mem1 mem2 mem3". I got below errors: +> +> +cxl region: create_region: region0: failed to set target3 to mem3 +> +cxl region: cmd_create_region: created 0 regions +> +> +And kernel log as below: +> +[ 60.536663] cxl_region region0: config state: 0 +> +[ 60.536675] cxl_region region0: probe: -6 +> +[ 60.536696] cxl_acpi ACPI0017:00: decoder0.0: created region0 +> +[ 60.538251] cxl region0: mem0:endpoint3 decoder3.0 add: +> +mem0:decoder3.0 @ 0 next: none nr_eps: 1 nr_targets: 1 +> +[ 60.538278] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +> +mem0:decoder3.0 @ 0 next: mem0 nr_eps: 1 nr_targets: 1 +> +[ 60.538295] cxl region0: ACPI0016:00:port1 decoder1.0 add: +> +mem0:decoder3.0 @ 0 next: 0000:0d:00.0 nr_eps: 1 nr_targets: 1 +> +[ 60.538647] cxl region0: mem1:endpoint4 decoder4.0 add: +> +mem1:decoder4.0 @ 1 next: none nr_eps: 1 nr_targets: 1 +> +[ 60.538663] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +> +mem1:decoder4.0 @ 1 next: mem1 nr_eps: 2 nr_targets: 2 +> +[ 60.538675] cxl region0: ACPI0016:00:port1 decoder1.0 add: +> +mem1:decoder4.0 @ 1 next: 0000:0d:00.0 nr_eps: 2 nr_targets: 1 +> +[ 60.539311] cxl region0: mem2:endpoint5 decoder5.0 add: +> +mem2:decoder5.0 @ 2 next: none nr_eps: 1 nr_targets: 1 +> +[ 60.539332] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +> +mem2:decoder5.0 @ 2 next: mem2 nr_eps: 3 nr_targets: 3 +> +[ 60.539343] cxl region0: ACPI0016:00:port1 decoder1.0 add: +> +mem2:decoder5.0 @ 2 next: 0000:0d:00.0 nr_eps: 3 nr_targets: 1 +> +[ 60.539711] cxl region0: mem3:endpoint6 decoder6.0 add: +> +mem3:decoder6.0 @ 3 next: none nr_eps: 1 nr_targets: 1 +> +[ 60.539723] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +> +mem3:decoder6.0 @ 3 next: mem3 nr_eps: 4 nr_targets: 4 +> +[ 60.539735] cxl region0: ACPI0016:00:port1 decoder1.0 add: +> +mem3:decoder6.0 @ 3 next: 0000:0d:00.0 nr_eps: 4 nr_targets: 1 +> +[ 60.539742] cxl region0: ACPI0016:00:port1 iw: 1 ig: 256 +> +[ 60.539747] cxl region0: ACPI0016:00:port1 target[0] = 0000:0c:00.0 +> +for mem0:decoder3.0 @ 0 +> +[ 60.539754] cxl region0: 0000:0d:00.0:port2 iw: 4 ig: 512 +This looks like off by 1 that should be fixed in the below mentioned +cxl/pending branch. That ig should be 256. Note the fix was +for a test case with a fat HB and no switch, but certainly looks +like this is the same issue. + +> +[ 60.539758] cxl region0: 0000:0d:00.0:port2 target[0] = +> +0000:0e:00.0 for mem0:decoder3.0 @ 0 +> +[ 60.539764] cxl region0: ACPI0016:00:port1: cannot host mem1:decoder4.0 at +> +1 +> +> +I have tried to write sysfs node manually, got same errors. +When stepping through by hand, which sysfs write triggers the crash above? + +Not sure it's related, but I've just sent out a fix to the +target register handling in QEMU. +20220808122051.14822-1-Jonathan.Cameron@huawei.com +/T/#m47ff985412ce44559e6b04d677c302f8cd371330">https://lore.kernel.org/linux-cxl/ +20220808122051.14822-1-Jonathan.Cameron@huawei.com +/T/#m47ff985412ce44559e6b04d677c302f8cd371330 +I did have one instance last week of triggering what looked to be a race +condition but +the stack trace doesn't looks related to what you've hit. + +It will probably be a few days before I have time to take a look at replicating +what you have seen. + +If you have time, try using the kernel.org cxl/pending branch as there are +a few additional fixes on there since you sent this email. Optimistic to hope +this is covered by one of those, but at least it will mean we are trying to +replicate +on same branch. + +Jonathan + + +> +> +Hope I can get some helps here. +> +> +Bob + +Hi Jonathan + +Thanks for your reply! + +On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +<Jonathan.Cameron@huawei.com> wrote: +> +> +Probably not related to your problem, but there is a disconnect in QEMU / +> +kernel assumptionsaround the presence of an HDM decoder when a HB only +> +has a single root port. Spec allows it to be provided or not as an +> +implementation choice. +> +Kernel assumes it isn't provide. Qemu assumes it is. +> +> +The temporary solution is to throw in a second root port on the HB and not +> +connect anything to it. Longer term I may special case this so that the +> +particular +> +decoder defaults to pass through settings in QEMU if there is only one root +> +port. +> +You are right! After adding an extra HB in qemu, I can create a x1 +region successfully. +But have some errors in Nvdimm: + +[ 74.925838] Unknown online node for memory at 0x10000000000, assuming node 0 +[ 74.925846] Unknown target node for memory at 0x10000000000, assuming node 0 +[ 74.927470] nd_region region0: nmem0: is disabled, failing probe + +And x4 region still failed with same errors, using latest cxl/preview +branch don't work. +I have picked "Two CXL emulation fixes" patches in qemu, still not working. + +Bob + +On Tue, 9 Aug 2022 21:07:06 +0800 +Bobo WL <lmw.bobo@gmail.com> wrote: + +> +Hi Jonathan +> +> +Thanks for your reply! +> +> +On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +<Jonathan.Cameron@huawei.com> wrote: +> +> +> +> Probably not related to your problem, but there is a disconnect in QEMU / +> +> kernel assumptionsaround the presence of an HDM decoder when a HB only +> +> has a single root port. Spec allows it to be provided or not as an +> +> implementation choice. +> +> Kernel assumes it isn't provide. Qemu assumes it is. +> +> +> +> The temporary solution is to throw in a second root port on the HB and not +> +> connect anything to it. Longer term I may special case this so that the +> +> particular +> +> decoder defaults to pass through settings in QEMU if there is only one root +> +> port. +> +> +> +> +You are right! After adding an extra HB in qemu, I can create a x1 +> +region successfully. +> +But have some errors in Nvdimm: +> +> +[ 74.925838] Unknown online node for memory at 0x10000000000, assuming node > 0 +> +[ 74.925846] Unknown target node for memory at 0x10000000000, assuming node > 0 +> +[ 74.927470] nd_region region0: nmem0: is disabled, failing probe +Ah. I've seen this one, but not chased it down yet. Was on my todo list to +chase +down. Once I reach this state I can verify the HDM Decode is correct which is +what +I've been using to test (Which wasn't true until earlier this week). +I'm currently testing via devmem, more for historical reasons than because it +makes +that much sense anymore. + +> +> +And x4 region still failed with same errors, using latest cxl/preview +> +branch don't work. +> +I have picked "Two CXL emulation fixes" patches in qemu, still not working. +> +> +Bob + +On Tue, 9 Aug 2022 17:08:25 +0100 +Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: + +> +On Tue, 9 Aug 2022 21:07:06 +0800 +> +Bobo WL <lmw.bobo@gmail.com> wrote: +> +> +> Hi Jonathan +> +> +> +> Thanks for your reply! +> +> +> +> On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +> <Jonathan.Cameron@huawei.com> wrote: +> +> > +> +> > Probably not related to your problem, but there is a disconnect in QEMU / +> +> > kernel assumptionsaround the presence of an HDM decoder when a HB only +> +> > has a single root port. Spec allows it to be provided or not as an +> +> > implementation choice. +> +> > Kernel assumes it isn't provide. Qemu assumes it is. +> +> > +> +> > The temporary solution is to throw in a second root port on the HB and not +> +> > connect anything to it. Longer term I may special case this so that the +> +> > particular +> +> > decoder defaults to pass through settings in QEMU if there is only one +> +> > root port. +> +> > +> +> +> +> You are right! After adding an extra HB in qemu, I can create a x1 +> +> region successfully. +> +> But have some errors in Nvdimm: +> +> +> +> [ 74.925838] Unknown online node for memory at 0x10000000000, assuming +> +> node 0 +> +> [ 74.925846] Unknown target node for memory at 0x10000000000, assuming +> +> node 0 +> +> [ 74.927470] nd_region region0: nmem0: is disabled, failing probe +> +> +Ah. I've seen this one, but not chased it down yet. Was on my todo list to +> +chase +> +down. Once I reach this state I can verify the HDM Decode is correct which is +> +what +> +I've been using to test (Which wasn't true until earlier this week). +> +I'm currently testing via devmem, more for historical reasons than because it +> +makes +> +that much sense anymore. +*embarassed cough*. We haven't fully hooked the LSA up in qemu yet. +I'd forgotten that was still on the todo list. I don't think it will +be particularly hard to do and will take a look in next few days. + +Very very indirectly this error is causing a driver probe fail that means that +we hit a code path that has a rather odd looking check on NDD_LABELING. +Should not have gotten near that path though - hence the problem is actually +when we call cxl_pmem_get_config_data() and it returns an error because +we haven't fully connected up the command in QEMU. + +Jonathan + + +> +> +> +> +> And x4 region still failed with same errors, using latest cxl/preview +> +> branch don't work. +> +> I have picked "Two CXL emulation fixes" patches in qemu, still not working. +> +> +> +> Bob + +On Thu, 11 Aug 2022 18:08:57 +0100 +Jonathan Cameron via <qemu-devel@nongnu.org> wrote: + +> +On Tue, 9 Aug 2022 17:08:25 +0100 +> +Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: +> +> +> On Tue, 9 Aug 2022 21:07:06 +0800 +> +> Bobo WL <lmw.bobo@gmail.com> wrote: +> +> +> +> > Hi Jonathan +> +> > +> +> > Thanks for your reply! +> +> > +> +> > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +> > <Jonathan.Cameron@huawei.com> wrote: +> +> > > +> +> > > Probably not related to your problem, but there is a disconnect in QEMU +> +> > > / +> +> > > kernel assumptionsaround the presence of an HDM decoder when a HB only +> +> > > has a single root port. Spec allows it to be provided or not as an +> +> > > implementation choice. +> +> > > Kernel assumes it isn't provide. Qemu assumes it is. +> +> > > +> +> > > The temporary solution is to throw in a second root port on the HB and +> +> > > not +> +> > > connect anything to it. Longer term I may special case this so that +> +> > > the particular +> +> > > decoder defaults to pass through settings in QEMU if there is only one +> +> > > root port. +> +> > > +> +> > +> +> > You are right! After adding an extra HB in qemu, I can create a x1 +> +> > region successfully. +> +> > But have some errors in Nvdimm: +> +> > +> +> > [ 74.925838] Unknown online node for memory at 0x10000000000, assuming +> +> > node 0 +> +> > [ 74.925846] Unknown target node for memory at 0x10000000000, assuming +> +> > node 0 +> +> > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe +> +> +> +> Ah. I've seen this one, but not chased it down yet. Was on my todo list to +> +> chase +> +> down. Once I reach this state I can verify the HDM Decode is correct which +> +> is what +> +> I've been using to test (Which wasn't true until earlier this week). +> +> I'm currently testing via devmem, more for historical reasons than because +> +> it makes +> +> that much sense anymore. +> +> +*embarassed cough*. We haven't fully hooked the LSA up in qemu yet. +> +I'd forgotten that was still on the todo list. I don't think it will +> +be particularly hard to do and will take a look in next few days. +> +> +Very very indirectly this error is causing a driver probe fail that means that +> +we hit a code path that has a rather odd looking check on NDD_LABELING. +> +Should not have gotten near that path though - hence the problem is actually +> +when we call cxl_pmem_get_config_data() and it returns an error because +> +we haven't fully connected up the command in QEMU. +So a least one bug in QEMU. We were not supporting variable length payloads on +mailbox +inputs (but were on outputs). That hasn't mattered until we get to LSA writes. +We just need to relax condition on the supplied length. + +diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +index c352a935c4..fdda9529fe 100644 +--- a/hw/cxl/cxl-mailbox-utils.c ++++ b/hw/cxl/cxl-mailbox-utils.c +@@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) + cxl_cmd = &cxl_cmd_set[set][cmd]; + h = cxl_cmd->handler; + if (h) { +- if (len == cxl_cmd->in) { ++ if (len == cxl_cmd->in || !cxl_cmd->in) { + cxl_cmd->payload = cxl_dstate->mbox_reg_state + + A_CXL_DEV_CMD_PAYLOAD; + ret = (*h)(cxl_cmd, cxl_dstate, &len); + + +This lets the nvdimm/region probe fine, but I'm getting some issues with +namespace capacity so I'll look at what is causing that next. +Unfortunately I'm not that familiar with the driver/nvdimm side of things +so it's take a while to figure out what kicks off what! + +Jonathan + +> +> +Jonathan +> +> +> +> +> +> > +> +> > And x4 region still failed with same errors, using latest cxl/preview +> +> > branch don't work. +> +> > I have picked "Two CXL emulation fixes" patches in qemu, still not +> +> > working. +> +> > +> +> > Bob +> +> + +Jonathan Cameron wrote: +> +On Thu, 11 Aug 2022 18:08:57 +0100 +> +Jonathan Cameron via <qemu-devel@nongnu.org> wrote: +> +> +> On Tue, 9 Aug 2022 17:08:25 +0100 +> +> Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: +> +> +> +> > On Tue, 9 Aug 2022 21:07:06 +0800 +> +> > Bobo WL <lmw.bobo@gmail.com> wrote: +> +> > +> +> > > Hi Jonathan +> +> > > +> +> > > Thanks for your reply! +> +> > > +> +> > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +> > > <Jonathan.Cameron@huawei.com> wrote: +> +> > > > +> +> > > > Probably not related to your problem, but there is a disconnect in +> +> > > > QEMU / +> +> > > > kernel assumptionsaround the presence of an HDM decoder when a HB only +> +> > > > has a single root port. Spec allows it to be provided or not as an +> +> > > > implementation choice. +> +> > > > Kernel assumes it isn't provide. Qemu assumes it is. +> +> > > > +> +> > > > The temporary solution is to throw in a second root port on the HB +> +> > > > and not +> +> > > > connect anything to it. Longer term I may special case this so that +> +> > > > the particular +> +> > > > decoder defaults to pass through settings in QEMU if there is only +> +> > > > one root port. +> +> > > > +> +> > > +> +> > > You are right! After adding an extra HB in qemu, I can create a x1 +> +> > > region successfully. +> +> > > But have some errors in Nvdimm: +> +> > > +> +> > > [ 74.925838] Unknown online node for memory at 0x10000000000, +> +> > > assuming node 0 +> +> > > [ 74.925846] Unknown target node for memory at 0x10000000000, +> +> > > assuming node 0 +> +> > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe +> +> > +> +> > Ah. I've seen this one, but not chased it down yet. Was on my todo list +> +> > to chase +> +> > down. Once I reach this state I can verify the HDM Decode is correct +> +> > which is what +> +> > I've been using to test (Which wasn't true until earlier this week). +> +> > I'm currently testing via devmem, more for historical reasons than +> +> > because it makes +> +> > that much sense anymore. +> +> +> +> *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. +> +> I'd forgotten that was still on the todo list. I don't think it will +> +> be particularly hard to do and will take a look in next few days. +> +> +> +> Very very indirectly this error is causing a driver probe fail that means +> +> that +> +> we hit a code path that has a rather odd looking check on NDD_LABELING. +> +> Should not have gotten near that path though - hence the problem is actually +> +> when we call cxl_pmem_get_config_data() and it returns an error because +> +> we haven't fully connected up the command in QEMU. +> +> +So a least one bug in QEMU. We were not supporting variable length payloads +> +on mailbox +> +inputs (but were on outputs). That hasn't mattered until we get to LSA +> +writes. +> +We just need to relax condition on the supplied length. +> +> +diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +> +index c352a935c4..fdda9529fe 100644 +> +--- a/hw/cxl/cxl-mailbox-utils.c +> ++++ b/hw/cxl/cxl-mailbox-utils.c +> +@@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) +> +cxl_cmd = &cxl_cmd_set[set][cmd]; +> +h = cxl_cmd->handler; +> +if (h) { +> +- if (len == cxl_cmd->in) { +> ++ if (len == cxl_cmd->in || !cxl_cmd->in) { +> +cxl_cmd->payload = cxl_dstate->mbox_reg_state + +> +A_CXL_DEV_CMD_PAYLOAD; +> +ret = (*h)(cxl_cmd, cxl_dstate, &len); +> +> +> +This lets the nvdimm/region probe fine, but I'm getting some issues with +> +namespace capacity so I'll look at what is causing that next. +> +Unfortunately I'm not that familiar with the driver/nvdimm side of things +> +so it's take a while to figure out what kicks off what! +The whirlwind tour is that 'struct nd_region' instances that represent a +persitent memory address range are composed of one more mappings of +'struct nvdimm' objects. The nvdimm object is driven by the dimm driver +in drivers/nvdimm/dimm.c. That driver is mainly charged with unlocking +the dimm (if locked) and interrogating the label area to look for +namespace labels. + +The label command calls are routed to the '->ndctl()' callback that was +registered when the CXL nvdimm_bus_descriptor was created. That callback +handles both 'bus' scope calls, currently none for CXL, and per nvdimm +calls. cxl_pmem_nvdimm_ctl() translates those generic LIBNVDIMM commands +to CXL commands. + +The 'struct nvdimm' objects that the CXL side registers have the +NDD_LABELING flag set which means that namespaces need to be explicitly +created / provisioned from region capacity. Otherwise, if +drivers/nvdimm/dimm.c does not find a namespace-label-index block then +the region reverts to label-less mode and a default namespace equal to +the size of the region is instantiated. + +If you are seeing small mismatches in namespace capacity then it may +just be the fact that by default 'ndctl create-namespace' results in an +'fsdax' mode namespace which just means that it is a block device where +1.5% of the capacity is reserved for 'struct page' metadata. You should +be able to see namespace capacity == region capacity by doing "ndctl +create-namespace -m raw", and disable DAX operation. + +Hope that helps. + +On Fri, 12 Aug 2022 09:03:02 -0700 +Dan Williams <dan.j.williams@intel.com> wrote: + +> +Jonathan Cameron wrote: +> +> On Thu, 11 Aug 2022 18:08:57 +0100 +> +> Jonathan Cameron via <qemu-devel@nongnu.org> wrote: +> +> +> +> > On Tue, 9 Aug 2022 17:08:25 +0100 +> +> > Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: +> +> > +> +> > > On Tue, 9 Aug 2022 21:07:06 +0800 +> +> > > Bobo WL <lmw.bobo@gmail.com> wrote: +> +> > > +> +> > > > Hi Jonathan +> +> > > > +> +> > > > Thanks for your reply! +> +> > > > +> +> > > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +> > > > <Jonathan.Cameron@huawei.com> wrote: +> +> > > > > +> +> > > > > Probably not related to your problem, but there is a disconnect in +> +> > > > > QEMU / +> +> > > > > kernel assumptionsaround the presence of an HDM decoder when a HB +> +> > > > > only +> +> > > > > has a single root port. Spec allows it to be provided or not as an +> +> > > > > implementation choice. +> +> > > > > Kernel assumes it isn't provide. Qemu assumes it is. +> +> > > > > +> +> > > > > The temporary solution is to throw in a second root port on the HB +> +> > > > > and not +> +> > > > > connect anything to it. Longer term I may special case this so +> +> > > > > that the particular +> +> > > > > decoder defaults to pass through settings in QEMU if there is only +> +> > > > > one root port. +> +> > > > > +> +> > > > +> +> > > > You are right! After adding an extra HB in qemu, I can create a x1 +> +> > > > region successfully. +> +> > > > But have some errors in Nvdimm: +> +> > > > +> +> > > > [ 74.925838] Unknown online node for memory at 0x10000000000, +> +> > > > assuming node 0 +> +> > > > [ 74.925846] Unknown target node for memory at 0x10000000000, +> +> > > > assuming node 0 +> +> > > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe +> +> > > > +> +> > > +> +> > > Ah. I've seen this one, but not chased it down yet. Was on my todo +> +> > > list to chase +> +> > > down. Once I reach this state I can verify the HDM Decode is correct +> +> > > which is what +> +> > > I've been using to test (Which wasn't true until earlier this week). +> +> > > I'm currently testing via devmem, more for historical reasons than +> +> > > because it makes +> +> > > that much sense anymore. +> +> > +> +> > *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. +> +> > I'd forgotten that was still on the todo list. I don't think it will +> +> > be particularly hard to do and will take a look in next few days. +> +> > +> +> > Very very indirectly this error is causing a driver probe fail that means +> +> > that +> +> > we hit a code path that has a rather odd looking check on NDD_LABELING. +> +> > Should not have gotten near that path though - hence the problem is +> +> > actually +> +> > when we call cxl_pmem_get_config_data() and it returns an error because +> +> > we haven't fully connected up the command in QEMU. +> +> +> +> So a least one bug in QEMU. We were not supporting variable length payloads +> +> on mailbox +> +> inputs (but were on outputs). That hasn't mattered until we get to LSA +> +> writes. +> +> We just need to relax condition on the supplied length. +> +> +> +> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +> +> index c352a935c4..fdda9529fe 100644 +> +> --- a/hw/cxl/cxl-mailbox-utils.c +> +> +++ b/hw/cxl/cxl-mailbox-utils.c +> +> @@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) +> +> cxl_cmd = &cxl_cmd_set[set][cmd]; +> +> h = cxl_cmd->handler; +> +> if (h) { +> +> - if (len == cxl_cmd->in) { +> +> + if (len == cxl_cmd->in || !cxl_cmd->in) { +> +> cxl_cmd->payload = cxl_dstate->mbox_reg_state + +> +> A_CXL_DEV_CMD_PAYLOAD; +> +> ret = (*h)(cxl_cmd, cxl_dstate, &len); +> +> +> +> +> +> This lets the nvdimm/region probe fine, but I'm getting some issues with +> +> namespace capacity so I'll look at what is causing that next. +> +> Unfortunately I'm not that familiar with the driver/nvdimm side of things +> +> so it's take a while to figure out what kicks off what! +> +> +The whirlwind tour is that 'struct nd_region' instances that represent a +> +persitent memory address range are composed of one more mappings of +> +'struct nvdimm' objects. The nvdimm object is driven by the dimm driver +> +in drivers/nvdimm/dimm.c. That driver is mainly charged with unlocking +> +the dimm (if locked) and interrogating the label area to look for +> +namespace labels. +> +> +The label command calls are routed to the '->ndctl()' callback that was +> +registered when the CXL nvdimm_bus_descriptor was created. That callback +> +handles both 'bus' scope calls, currently none for CXL, and per nvdimm +> +calls. cxl_pmem_nvdimm_ctl() translates those generic LIBNVDIMM commands +> +to CXL commands. +> +> +The 'struct nvdimm' objects that the CXL side registers have the +> +NDD_LABELING flag set which means that namespaces need to be explicitly +> +created / provisioned from region capacity. Otherwise, if +> +drivers/nvdimm/dimm.c does not find a namespace-label-index block then +> +the region reverts to label-less mode and a default namespace equal to +> +the size of the region is instantiated. +> +> +If you are seeing small mismatches in namespace capacity then it may +> +just be the fact that by default 'ndctl create-namespace' results in an +> +'fsdax' mode namespace which just means that it is a block device where +> +1.5% of the capacity is reserved for 'struct page' metadata. You should +> +be able to see namespace capacity == region capacity by doing "ndctl +> +create-namespace -m raw", and disable DAX operation. +Currently ndctl create-namespace crashes qemu ;) +Which isn't ideal! + +> +> +Hope that helps. +Got me looking at the right code. Thanks! + +Jonathan + +On Fri, 12 Aug 2022 17:15:09 +0100 +Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: + +> +On Fri, 12 Aug 2022 09:03:02 -0700 +> +Dan Williams <dan.j.williams@intel.com> wrote: +> +> +> Jonathan Cameron wrote: +> +> > On Thu, 11 Aug 2022 18:08:57 +0100 +> +> > Jonathan Cameron via <qemu-devel@nongnu.org> wrote: +> +> > +> +> > > On Tue, 9 Aug 2022 17:08:25 +0100 +> +> > > Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: +> +> > > +> +> > > > On Tue, 9 Aug 2022 21:07:06 +0800 +> +> > > > Bobo WL <lmw.bobo@gmail.com> wrote: +> +> > > > +> +> > > > > Hi Jonathan +> +> > > > > +> +> > > > > Thanks for your reply! +> +> > > > > +> +> > > > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +> > > > > <Jonathan.Cameron@huawei.com> wrote: +> +> > > > > > +> +> > > > > > Probably not related to your problem, but there is a disconnect +> +> > > > > > in QEMU / +> +> > > > > > kernel assumptionsaround the presence of an HDM decoder when a HB +> +> > > > > > only +> +> > > > > > has a single root port. Spec allows it to be provided or not as +> +> > > > > > an implementation choice. +> +> > > > > > Kernel assumes it isn't provide. Qemu assumes it is. +> +> > > > > > +> +> > > > > > The temporary solution is to throw in a second root port on the +> +> > > > > > HB and not +> +> > > > > > connect anything to it. Longer term I may special case this so +> +> > > > > > that the particular +> +> > > > > > decoder defaults to pass through settings in QEMU if there is +> +> > > > > > only one root port. +> +> > > > > > +> +> > > > > +> +> > > > > You are right! After adding an extra HB in qemu, I can create a x1 +> +> > > > > region successfully. +> +> > > > > But have some errors in Nvdimm: +> +> > > > > +> +> > > > > [ 74.925838] Unknown online node for memory at 0x10000000000, +> +> > > > > assuming node 0 +> +> > > > > [ 74.925846] Unknown target node for memory at 0x10000000000, +> +> > > > > assuming node 0 +> +> > > > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe +> +> > > > > +> +> > > > +> +> > > > Ah. I've seen this one, but not chased it down yet. Was on my todo +> +> > > > list to chase +> +> > > > down. Once I reach this state I can verify the HDM Decode is correct +> +> > > > which is what +> +> > > > I've been using to test (Which wasn't true until earlier this week). +> +> > > > I'm currently testing via devmem, more for historical reasons than +> +> > > > because it makes +> +> > > > that much sense anymore. +> +> > > +> +> > > *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. +> +> > > I'd forgotten that was still on the todo list. I don't think it will +> +> > > be particularly hard to do and will take a look in next few days. +> +> > > +> +> > > Very very indirectly this error is causing a driver probe fail that +> +> > > means that +> +> > > we hit a code path that has a rather odd looking check on NDD_LABELING. +> +> > > Should not have gotten near that path though - hence the problem is +> +> > > actually +> +> > > when we call cxl_pmem_get_config_data() and it returns an error because +> +> > > we haven't fully connected up the command in QEMU. +> +> > +> +> > So a least one bug in QEMU. We were not supporting variable length +> +> > payloads on mailbox +> +> > inputs (but were on outputs). That hasn't mattered until we get to LSA +> +> > writes. +> +> > We just need to relax condition on the supplied length. +> +> > +> +> > diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +> +> > index c352a935c4..fdda9529fe 100644 +> +> > --- a/hw/cxl/cxl-mailbox-utils.c +> +> > +++ b/hw/cxl/cxl-mailbox-utils.c +> +> > @@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) +> +> > cxl_cmd = &cxl_cmd_set[set][cmd]; +> +> > h = cxl_cmd->handler; +> +> > if (h) { +> +> > - if (len == cxl_cmd->in) { +> +> > + if (len == cxl_cmd->in || !cxl_cmd->in) { +> +> > cxl_cmd->payload = cxl_dstate->mbox_reg_state + +> +> > A_CXL_DEV_CMD_PAYLOAD; +> +> > ret = (*h)(cxl_cmd, cxl_dstate, &len); +> +> > +> +> > +> +> > This lets the nvdimm/region probe fine, but I'm getting some issues with +> +> > namespace capacity so I'll look at what is causing that next. +> +> > Unfortunately I'm not that familiar with the driver/nvdimm side of things +> +> > so it's take a while to figure out what kicks off what! +> +> +> +> The whirlwind tour is that 'struct nd_region' instances that represent a +> +> persitent memory address range are composed of one more mappings of +> +> 'struct nvdimm' objects. The nvdimm object is driven by the dimm driver +> +> in drivers/nvdimm/dimm.c. That driver is mainly charged with unlocking +> +> the dimm (if locked) and interrogating the label area to look for +> +> namespace labels. +> +> +> +> The label command calls are routed to the '->ndctl()' callback that was +> +> registered when the CXL nvdimm_bus_descriptor was created. That callback +> +> handles both 'bus' scope calls, currently none for CXL, and per nvdimm +> +> calls. cxl_pmem_nvdimm_ctl() translates those generic LIBNVDIMM commands +> +> to CXL commands. +> +> +> +> The 'struct nvdimm' objects that the CXL side registers have the +> +> NDD_LABELING flag set which means that namespaces need to be explicitly +> +> created / provisioned from region capacity. Otherwise, if +> +> drivers/nvdimm/dimm.c does not find a namespace-label-index block then +> +> the region reverts to label-less mode and a default namespace equal to +> +> the size of the region is instantiated. +> +> +> +> If you are seeing small mismatches in namespace capacity then it may +> +> just be the fact that by default 'ndctl create-namespace' results in an +> +> 'fsdax' mode namespace which just means that it is a block device where +> +> 1.5% of the capacity is reserved for 'struct page' metadata. You should +> +> be able to see namespace capacity == region capacity by doing "ndctl +> +> create-namespace -m raw", and disable DAX operation. +> +> +Currently ndctl create-namespace crashes qemu ;) +> +Which isn't ideal! +> +Found a cause for this one. Mailbox payload may be as small as 256 bytes. +We have code in kernel sanity checking that output payload fits in the +mailbox, but nothing on the input payload. Symptom is that we write just +off the end whatever size the payload is. Note doing this shouldn't crash +qemu - so I need to fix a range check somewhere. + +I think this is because cxl_pmem_get_config_size() returns the mailbox +payload size as being the available LSA size, forgetting to remove the +size of the headers on the set_lsa side of things. +https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git/tree/drivers/cxl/pmem.c?h=next#n110 +I've hacked the max_payload to be -8 + +Now we still don't succeed in creating the namespace, but bonus is it doesn't +crash any more. + + +Jonathan + + + +> +> +> +> Hope that helps. +> +Got me looking at the right code. Thanks! +> +> +Jonathan +> +> + +On Mon, 15 Aug 2022 15:18:09 +0100 +Jonathan Cameron via <qemu-devel@nongnu.org> wrote: + +> +On Fri, 12 Aug 2022 17:15:09 +0100 +> +Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: +> +> +> On Fri, 12 Aug 2022 09:03:02 -0700 +> +> Dan Williams <dan.j.williams@intel.com> wrote: +> +> +> +> > Jonathan Cameron wrote: +> +> > > On Thu, 11 Aug 2022 18:08:57 +0100 +> +> > > Jonathan Cameron via <qemu-devel@nongnu.org> wrote: +> +> > > +> +> > > > On Tue, 9 Aug 2022 17:08:25 +0100 +> +> > > > Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: +> +> > > > +> +> > > > > On Tue, 9 Aug 2022 21:07:06 +0800 +> +> > > > > Bobo WL <lmw.bobo@gmail.com> wrote: +> +> > > > > +> +> > > > > > Hi Jonathan +> +> > > > > > +> +> > > > > > Thanks for your reply! +> +> > > > > > +> +> > > > > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +> > > > > > <Jonathan.Cameron@huawei.com> wrote: +> +> > > > > > > +> +> > > > > > > Probably not related to your problem, but there is a disconnect +> +> > > > > > > in QEMU / +> +> > > > > > > kernel assumptionsaround the presence of an HDM decoder when a +> +> > > > > > > HB only +> +> > > > > > > has a single root port. Spec allows it to be provided or not as +> +> > > > > > > an implementation choice. +> +> > > > > > > Kernel assumes it isn't provide. Qemu assumes it is. +> +> > > > > > > +> +> > > > > > > The temporary solution is to throw in a second root port on the +> +> > > > > > > HB and not +> +> > > > > > > connect anything to it. Longer term I may special case this so +> +> > > > > > > that the particular +> +> > > > > > > decoder defaults to pass through settings in QEMU if there is +> +> > > > > > > only one root port. +> +> > > > > > > +> +> > > > > > +> +> > > > > > You are right! After adding an extra HB in qemu, I can create a x1 +> +> > > > > > region successfully. +> +> > > > > > But have some errors in Nvdimm: +> +> > > > > > +> +> > > > > > [ 74.925838] Unknown online node for memory at 0x10000000000, +> +> > > > > > assuming node 0 +> +> > > > > > [ 74.925846] Unknown target node for memory at 0x10000000000, +> +> > > > > > assuming node 0 +> +> > > > > > [ 74.927470] nd_region region0: nmem0: is disabled, failing +> +> > > > > > probe +> +> > > > > +> +> > > > > Ah. I've seen this one, but not chased it down yet. Was on my todo +> +> > > > > list to chase +> +> > > > > down. Once I reach this state I can verify the HDM Decode is +> +> > > > > correct which is what +> +> > > > > I've been using to test (Which wasn't true until earlier this +> +> > > > > week). +> +> > > > > I'm currently testing via devmem, more for historical reasons than +> +> > > > > because it makes +> +> > > > > that much sense anymore. +> +> > > > +> +> > > > *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. +> +> > > > I'd forgotten that was still on the todo list. I don't think it will +> +> > > > be particularly hard to do and will take a look in next few days. +> +> > > > +> +> > > > Very very indirectly this error is causing a driver probe fail that +> +> > > > means that +> +> > > > we hit a code path that has a rather odd looking check on +> +> > > > NDD_LABELING. +> +> > > > Should not have gotten near that path though - hence the problem is +> +> > > > actually +> +> > > > when we call cxl_pmem_get_config_data() and it returns an error +> +> > > > because +> +> > > > we haven't fully connected up the command in QEMU. +> +> > > +> +> > > So a least one bug in QEMU. We were not supporting variable length +> +> > > payloads on mailbox +> +> > > inputs (but were on outputs). That hasn't mattered until we get to LSA +> +> > > writes. +> +> > > We just need to relax condition on the supplied length. +> +> > > +> +> > > diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +> +> > > index c352a935c4..fdda9529fe 100644 +> +> > > --- a/hw/cxl/cxl-mailbox-utils.c +> +> > > +++ b/hw/cxl/cxl-mailbox-utils.c +> +> > > @@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) +> +> > > cxl_cmd = &cxl_cmd_set[set][cmd]; +> +> > > h = cxl_cmd->handler; +> +> > > if (h) { +> +> > > - if (len == cxl_cmd->in) { +> +> > > + if (len == cxl_cmd->in || !cxl_cmd->in) { +> +> > > cxl_cmd->payload = cxl_dstate->mbox_reg_state + +> +> > > A_CXL_DEV_CMD_PAYLOAD; +> +> > > ret = (*h)(cxl_cmd, cxl_dstate, &len); +> +> > > +> +> > > +> +> > > This lets the nvdimm/region probe fine, but I'm getting some issues with +> +> > > namespace capacity so I'll look at what is causing that next. +> +> > > Unfortunately I'm not that familiar with the driver/nvdimm side of +> +> > > things +> +> > > so it's take a while to figure out what kicks off what! +> +> > +> +> > The whirlwind tour is that 'struct nd_region' instances that represent a +> +> > persitent memory address range are composed of one more mappings of +> +> > 'struct nvdimm' objects. The nvdimm object is driven by the dimm driver +> +> > in drivers/nvdimm/dimm.c. That driver is mainly charged with unlocking +> +> > the dimm (if locked) and interrogating the label area to look for +> +> > namespace labels. +> +> > +> +> > The label command calls are routed to the '->ndctl()' callback that was +> +> > registered when the CXL nvdimm_bus_descriptor was created. That callback +> +> > handles both 'bus' scope calls, currently none for CXL, and per nvdimm +> +> > calls. cxl_pmem_nvdimm_ctl() translates those generic LIBNVDIMM commands +> +> > to CXL commands. +> +> > +> +> > The 'struct nvdimm' objects that the CXL side registers have the +> +> > NDD_LABELING flag set which means that namespaces need to be explicitly +> +> > created / provisioned from region capacity. Otherwise, if +> +> > drivers/nvdimm/dimm.c does not find a namespace-label-index block then +> +> > the region reverts to label-less mode and a default namespace equal to +> +> > the size of the region is instantiated. +> +> > +> +> > If you are seeing small mismatches in namespace capacity then it may +> +> > just be the fact that by default 'ndctl create-namespace' results in an +> +> > 'fsdax' mode namespace which just means that it is a block device where +> +> > 1.5% of the capacity is reserved for 'struct page' metadata. You should +> +> > be able to see namespace capacity == region capacity by doing "ndctl +> +> > create-namespace -m raw", and disable DAX operation. +> +> +> +> Currently ndctl create-namespace crashes qemu ;) +> +> Which isn't ideal! +> +> +> +> +Found a cause for this one. Mailbox payload may be as small as 256 bytes. +> +We have code in kernel sanity checking that output payload fits in the +> +mailbox, but nothing on the input payload. Symptom is that we write just +> +off the end whatever size the payload is. Note doing this shouldn't crash +> +qemu - so I need to fix a range check somewhere. +> +> +I think this is because cxl_pmem_get_config_size() returns the mailbox +> +payload size as being the available LSA size, forgetting to remove the +> +size of the headers on the set_lsa side of things. +> +https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git/tree/drivers/cxl/pmem.c?h=next#n110 +> +> +I've hacked the max_payload to be -8 +> +> +Now we still don't succeed in creating the namespace, but bonus is it doesn't +> +crash any more. +In the interests of defensive / correct handling from QEMU I took a +look into why it was crashing. Turns out that providing a NULL write callback +for +the memory device region (that the above overlarge write was spilling into) +isn't +a safe thing to do. Needs a stub. Oops. + +On plus side we might never have noticed this was going wrong without the crash +*silver lining in every cloud* + +Fix to follow... + +Jonathan + + +> +> +> +Jonathan +> +> +> +> +> > +> +> > Hope that helps. +> +> Got me looking at the right code. Thanks! +> +> +> +> Jonathan +> +> +> +> +> +> + +On Mon, 15 Aug 2022 at 15:55, Jonathan Cameron via <qemu-arm@nongnu.org> wrote: +> +In the interests of defensive / correct handling from QEMU I took a +> +look into why it was crashing. Turns out that providing a NULL write +> +callback for +> +the memory device region (that the above overlarge write was spilling into) +> +isn't +> +a safe thing to do. Needs a stub. Oops. +Yeah. We've talked before about adding an assert so that that kind of +"missing function" bug is caught at device creation rather than only +if the guest tries to access the device, but we never quite got around +to it... + +-- PMM + +On Fri, 12 Aug 2022 16:44:03 +0100 +Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: + +> +On Thu, 11 Aug 2022 18:08:57 +0100 +> +Jonathan Cameron via <qemu-devel@nongnu.org> wrote: +> +> +> On Tue, 9 Aug 2022 17:08:25 +0100 +> +> Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: +> +> +> +> > On Tue, 9 Aug 2022 21:07:06 +0800 +> +> > Bobo WL <lmw.bobo@gmail.com> wrote: +> +> > +> +> > > Hi Jonathan +> +> > > +> +> > > Thanks for your reply! +> +> > > +> +> > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +> > > <Jonathan.Cameron@huawei.com> wrote: +> +> > > > +> +> > > > Probably not related to your problem, but there is a disconnect in +> +> > > > QEMU / +> +> > > > kernel assumptionsaround the presence of an HDM decoder when a HB only +> +> > > > has a single root port. Spec allows it to be provided or not as an +> +> > > > implementation choice. +> +> > > > Kernel assumes it isn't provide. Qemu assumes it is. +> +> > > > +> +> > > > The temporary solution is to throw in a second root port on the HB +> +> > > > and not +> +> > > > connect anything to it. Longer term I may special case this so that +> +> > > > the particular +> +> > > > decoder defaults to pass through settings in QEMU if there is only +> +> > > > one root port. +> +> > > > +> +> > > +> +> > > You are right! After adding an extra HB in qemu, I can create a x1 +> +> > > region successfully. +> +> > > But have some errors in Nvdimm: +> +> > > +> +> > > [ 74.925838] Unknown online node for memory at 0x10000000000, +> +> > > assuming node 0 +> +> > > [ 74.925846] Unknown target node for memory at 0x10000000000, +> +> > > assuming node 0 +> +> > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe +> +> > > +> +> > +> +> > Ah. I've seen this one, but not chased it down yet. Was on my todo list +> +> > to chase +> +> > down. Once I reach this state I can verify the HDM Decode is correct +> +> > which is what +> +> > I've been using to test (Which wasn't true until earlier this week). +> +> > I'm currently testing via devmem, more for historical reasons than +> +> > because it makes +> +> > that much sense anymore. +> +> +> +> *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. +> +> I'd forgotten that was still on the todo list. I don't think it will +> +> be particularly hard to do and will take a look in next few days. +> +> +> +> Very very indirectly this error is causing a driver probe fail that means +> +> that +> +> we hit a code path that has a rather odd looking check on NDD_LABELING. +> +> Should not have gotten near that path though - hence the problem is actually +> +> when we call cxl_pmem_get_config_data() and it returns an error because +> +> we haven't fully connected up the command in QEMU. +> +> +So a least one bug in QEMU. We were not supporting variable length payloads +> +on mailbox +> +inputs (but were on outputs). That hasn't mattered until we get to LSA +> +writes. +> +We just need to relax condition on the supplied length. +> +> +diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +> +index c352a935c4..fdda9529fe 100644 +> +--- a/hw/cxl/cxl-mailbox-utils.c +> ++++ b/hw/cxl/cxl-mailbox-utils.c +> +@@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) +> +cxl_cmd = &cxl_cmd_set[set][cmd]; +> +h = cxl_cmd->handler; +> +if (h) { +> +- if (len == cxl_cmd->in) { +> ++ if (len == cxl_cmd->in || !cxl_cmd->in) { +Fix is wrong as we use ~0 as the placeholder for variable payload, not 0. + +With that fixed we hit new fun paths - after some errors we get the +worrying - not totally sure but looks like a failure on an error cleanup. +I'll chase down the error source, but even then this is probably triggerable by +hardware problem or similar. Some bonus prints in here from me chasing +error paths, but it's otherwise just cxl/next + the fix I posted earlier today. + +[ 69.919877] nd_bus ndbus0: START: nd_region.probe(region0) +[ 69.920108] nd_region_probe +[ 69.920623] ------------[ cut here ]------------ +[ 69.920675] refcount_t: addition on 0; use-after-free. +[ 69.921314] WARNING: CPU: 3 PID: 710 at lib/refcount.c:25 +refcount_warn_saturate+0xa0/0x144 +[ 69.926949] Modules linked in: cxl_pmem cxl_mem cxl_pci cxl_port cxl_acpi +cxl_core +[ 69.928830] CPU: 3 PID: 710 Comm: kworker/u8:9 Not tainted 5.19.0-rc3+ #399 +[ 69.930596] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 +[ 69.931482] Workqueue: events_unbound async_run_entry_fn +[ 69.932403] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) +[ 69.934023] pc : refcount_warn_saturate+0xa0/0x144 +[ 69.935161] lr : refcount_warn_saturate+0xa0/0x144 +[ 69.936541] sp : ffff80000890b960 +[ 69.937921] x29: ffff80000890b960 x28: 0000000000000000 x27: 0000000000000000 +[ 69.940917] x26: ffffa54a90d5cb10 x25: ffffa54a90809e98 x24: 0000000000000000 +[ 69.942537] x23: ffffa54a91a3d8d8 x22: ffff0000c5254800 x21: ffff0000c5254800 +[ 69.944013] x20: ffff0000ce924180 x19: ffff0000c5254800 x18: ffffffffffffffff +[ 69.946100] x17: ffff5ab66e5ef000 x16: ffff80000801c000 x15: 0000000000000000 +[ 69.947585] x14: 0000000000000001 x13: 0a2e656572662d72 x12: 657466612d657375 +[ 69.948670] x11: 203b30206e6f206e x10: 6f69746964646120 x9 : ffffa54a8f63d288 +[ 69.950679] x8 : 206e6f206e6f6974 x7 : 69646461203a745f x6 : 00000000fffff31e +[ 69.952113] x5 : ffff0000ff61ba08 x4 : 00000000fffff31e x3 : ffff5ab66e5ef000 +root@debian:/sys/bus/cxl/devices/decoder0.0/region0# [ 69.954752] x2 : +0000000000000000 x1 : 0000000000000000 x0 : ffff0000c512e740 +[ 69.957098] Call trace: +[ 69.957959] refcount_warn_saturate+0xa0/0x144 +[ 69.958773] get_ndd+0x5c/0x80 +[ 69.959294] nd_region_register_namespaces+0xe4/0xe90 +[ 69.960253] nd_region_probe+0x100/0x290 +[ 69.960796] nvdimm_bus_probe+0xf4/0x1c0 +[ 69.962087] really_probe+0x19c/0x3f0 +[ 69.962620] __driver_probe_device+0x11c/0x190 +[ 69.963258] driver_probe_device+0x44/0xf4 +[ 69.963773] __device_attach_driver+0xa4/0x140 +[ 69.964471] bus_for_each_drv+0x84/0xe0 +[ 69.965068] __device_attach+0xb0/0x1f0 +[ 69.966101] device_initial_probe+0x20/0x30 +[ 69.967142] bus_probe_device+0xa4/0xb0 +[ 69.968104] device_add+0x3e8/0x910 +[ 69.969111] nd_async_device_register+0x24/0x74 +[ 69.969928] async_run_entry_fn+0x40/0x150 +[ 69.970725] process_one_work+0x1dc/0x450 +[ 69.971796] worker_thread+0x154/0x450 +[ 69.972700] kthread+0x118/0x120 +[ 69.974141] ret_from_fork+0x10/0x20 +[ 69.975141] ---[ end trace 0000000000000000 ]--- +[ 70.117887] Into nd_namespace_pmem_set_resource() + +> +cxl_cmd->payload = cxl_dstate->mbox_reg_state + +> +A_CXL_DEV_CMD_PAYLOAD; +> +ret = (*h)(cxl_cmd, cxl_dstate, &len); +> +> +> +This lets the nvdimm/region probe fine, but I'm getting some issues with +> +namespace capacity so I'll look at what is causing that next. +> +Unfortunately I'm not that familiar with the driver/nvdimm side of things +> +so it's take a while to figure out what kicks off what! +> +> +Jonathan +> +> +> +> +> Jonathan +> +> +> +> +> +> > +> +> > > +> +> > > And x4 region still failed with same errors, using latest cxl/preview +> +> > > branch don't work. +> +> > > I have picked "Two CXL emulation fixes" patches in qemu, still not +> +> > > working. +> +> > > +> +> > > Bob +> +> +> +> +> + +On Mon, 15 Aug 2022 18:04:44 +0100 +Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: + +> +On Fri, 12 Aug 2022 16:44:03 +0100 +> +Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: +> +> +> On Thu, 11 Aug 2022 18:08:57 +0100 +> +> Jonathan Cameron via <qemu-devel@nongnu.org> wrote: +> +> +> +> > On Tue, 9 Aug 2022 17:08:25 +0100 +> +> > Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: +> +> > +> +> > > On Tue, 9 Aug 2022 21:07:06 +0800 +> +> > > Bobo WL <lmw.bobo@gmail.com> wrote: +> +> > > +> +> > > > Hi Jonathan +> +> > > > +> +> > > > Thanks for your reply! +> +> > > > +> +> > > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +> > > > <Jonathan.Cameron@huawei.com> wrote: +> +> > > > > +> +> > > > > Probably not related to your problem, but there is a disconnect in +> +> > > > > QEMU / +> +> > > > > kernel assumptionsaround the presence of an HDM decoder when a HB +> +> > > > > only +> +> > > > > has a single root port. Spec allows it to be provided or not as an +> +> > > > > implementation choice. +> +> > > > > Kernel assumes it isn't provide. Qemu assumes it is. +> +> > > > > +> +> > > > > The temporary solution is to throw in a second root port on the HB +> +> > > > > and not +> +> > > > > connect anything to it. Longer term I may special case this so +> +> > > > > that the particular +> +> > > > > decoder defaults to pass through settings in QEMU if there is only +> +> > > > > one root port. +> +> > > > > +> +> > > > +> +> > > > You are right! After adding an extra HB in qemu, I can create a x1 +> +> > > > region successfully. +> +> > > > But have some errors in Nvdimm: +> +> > > > +> +> > > > [ 74.925838] Unknown online node for memory at 0x10000000000, +> +> > > > assuming node 0 +> +> > > > [ 74.925846] Unknown target node for memory at 0x10000000000, +> +> > > > assuming node 0 +> +> > > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe +> +> > > > +> +> > > +> +> > > Ah. I've seen this one, but not chased it down yet. Was on my todo +> +> > > list to chase +> +> > > down. Once I reach this state I can verify the HDM Decode is correct +> +> > > which is what +> +> > > I've been using to test (Which wasn't true until earlier this week). +> +> > > I'm currently testing via devmem, more for historical reasons than +> +> > > because it makes +> +> > > that much sense anymore. +> +> > +> +> > *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. +> +> > I'd forgotten that was still on the todo list. I don't think it will +> +> > be particularly hard to do and will take a look in next few days. +> +> > +> +> > Very very indirectly this error is causing a driver probe fail that means +> +> > that +> +> > we hit a code path that has a rather odd looking check on NDD_LABELING. +> +> > Should not have gotten near that path though - hence the problem is +> +> > actually +> +> > when we call cxl_pmem_get_config_data() and it returns an error because +> +> > we haven't fully connected up the command in QEMU. +> +> +> +> So a least one bug in QEMU. We were not supporting variable length payloads +> +> on mailbox +> +> inputs (but were on outputs). That hasn't mattered until we get to LSA +> +> writes. +> +> We just need to relax condition on the supplied length. +> +> +> +> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +> +> index c352a935c4..fdda9529fe 100644 +> +> --- a/hw/cxl/cxl-mailbox-utils.c +> +> +++ b/hw/cxl/cxl-mailbox-utils.c +> +> @@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) +> +> cxl_cmd = &cxl_cmd_set[set][cmd]; +> +> h = cxl_cmd->handler; +> +> if (h) { +> +> - if (len == cxl_cmd->in) { +> +> + if (len == cxl_cmd->in || !cxl_cmd->in) { +> +Fix is wrong as we use ~0 as the placeholder for variable payload, not 0. +Cause of the error is a failure in GET_LSA. +Reason, payload length is wrong in QEMU but was hidden previously by my wrong +fix here. Probably still a good idea to inject an error in GET_LSA and chase +down the refcount issue. + + +diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +index fdda9529fe..e8565fbd6e 100644 +--- a/hw/cxl/cxl-mailbox-utils.c ++++ b/hw/cxl/cxl-mailbox-utils.c +@@ -489,7 +489,7 @@ static struct cxl_cmd cxl_cmd_set[256][256] = { + cmd_identify_memory_device, 0, 0 }, + [CCLS][GET_PARTITION_INFO] = { "CCLS_GET_PARTITION_INFO", + cmd_ccls_get_partition_info, 0, 0 }, +- [CCLS][GET_LSA] = { "CCLS_GET_LSA", cmd_ccls_get_lsa, 0, 0 }, ++ [CCLS][GET_LSA] = { "CCLS_GET_LSA", cmd_ccls_get_lsa, 8, 0 }, + [CCLS][SET_LSA] = { "CCLS_SET_LSA", cmd_ccls_set_lsa, + ~0, IMMEDIATE_CONFIG_CHANGE | IMMEDIATE_DATA_CHANGE }, + [MEDIA_AND_POISON][GET_POISON_LIST] = { "MEDIA_AND_POISON_GET_POISON_LIST", +@@ -510,12 +510,13 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) + cxl_cmd = &cxl_cmd_set[set][cmd]; + h = cxl_cmd->handler; + if (h) { +- if (len == cxl_cmd->in || !cxl_cmd->in) { ++ if (len == cxl_cmd->in || cxl_cmd->in == ~0) { + cxl_cmd->payload = cxl_dstate->mbox_reg_state + + A_CXL_DEV_CMD_PAYLOAD; + +And woot, we get a namespace in the LSA :) + +I'll post QEMU fixes in next day or two. Kernel side now seems more or less +fine be it with suspicious refcount underflow. + +> +> +With that fixed we hit new fun paths - after some errors we get the +> +worrying - not totally sure but looks like a failure on an error cleanup. +> +I'll chase down the error source, but even then this is probably triggerable +> +by +> +hardware problem or similar. Some bonus prints in here from me chasing +> +error paths, but it's otherwise just cxl/next + the fix I posted earlier +> +today. +> +> +[ 69.919877] nd_bus ndbus0: START: nd_region.probe(region0) +> +[ 69.920108] nd_region_probe +> +[ 69.920623] ------------[ cut here ]------------ +> +[ 69.920675] refcount_t: addition on 0; use-after-free. +> +[ 69.921314] WARNING: CPU: 3 PID: 710 at lib/refcount.c:25 +> +refcount_warn_saturate+0xa0/0x144 +> +[ 69.926949] Modules linked in: cxl_pmem cxl_mem cxl_pci cxl_port cxl_acpi +> +cxl_core +> +[ 69.928830] CPU: 3 PID: 710 Comm: kworker/u8:9 Not tainted 5.19.0-rc3+ #399 +> +[ 69.930596] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 +> +[ 69.931482] Workqueue: events_unbound async_run_entry_fn +> +[ 69.932403] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) +> +[ 69.934023] pc : refcount_warn_saturate+0xa0/0x144 +> +[ 69.935161] lr : refcount_warn_saturate+0xa0/0x144 +> +[ 69.936541] sp : ffff80000890b960 +> +[ 69.937921] x29: ffff80000890b960 x28: 0000000000000000 x27: +> +0000000000000000 +> +[ 69.940917] x26: ffffa54a90d5cb10 x25: ffffa54a90809e98 x24: +> +0000000000000000 +> +[ 69.942537] x23: ffffa54a91a3d8d8 x22: ffff0000c5254800 x21: +> +ffff0000c5254800 +> +[ 69.944013] x20: ffff0000ce924180 x19: ffff0000c5254800 x18: +> +ffffffffffffffff +> +[ 69.946100] x17: ffff5ab66e5ef000 x16: ffff80000801c000 x15: +> +0000000000000000 +> +[ 69.947585] x14: 0000000000000001 x13: 0a2e656572662d72 x12: +> +657466612d657375 +> +[ 69.948670] x11: 203b30206e6f206e x10: 6f69746964646120 x9 : +> +ffffa54a8f63d288 +> +[ 69.950679] x8 : 206e6f206e6f6974 x7 : 69646461203a745f x6 : +> +00000000fffff31e +> +[ 69.952113] x5 : ffff0000ff61ba08 x4 : 00000000fffff31e x3 : +> +ffff5ab66e5ef000 +> +root@debian:/sys/bus/cxl/devices/decoder0.0/region0# [ 69.954752] x2 : +> +0000000000000000 x1 : 0000000000000000 x0 : ffff0000c512e740 +> +[ 69.957098] Call trace: +> +[ 69.957959] refcount_warn_saturate+0xa0/0x144 +> +[ 69.958773] get_ndd+0x5c/0x80 +> +[ 69.959294] nd_region_register_namespaces+0xe4/0xe90 +> +[ 69.960253] nd_region_probe+0x100/0x290 +> +[ 69.960796] nvdimm_bus_probe+0xf4/0x1c0 +> +[ 69.962087] really_probe+0x19c/0x3f0 +> +[ 69.962620] __driver_probe_device+0x11c/0x190 +> +[ 69.963258] driver_probe_device+0x44/0xf4 +> +[ 69.963773] __device_attach_driver+0xa4/0x140 +> +[ 69.964471] bus_for_each_drv+0x84/0xe0 +> +[ 69.965068] __device_attach+0xb0/0x1f0 +> +[ 69.966101] device_initial_probe+0x20/0x30 +> +[ 69.967142] bus_probe_device+0xa4/0xb0 +> +[ 69.968104] device_add+0x3e8/0x910 +> +[ 69.969111] nd_async_device_register+0x24/0x74 +> +[ 69.969928] async_run_entry_fn+0x40/0x150 +> +[ 69.970725] process_one_work+0x1dc/0x450 +> +[ 69.971796] worker_thread+0x154/0x450 +> +[ 69.972700] kthread+0x118/0x120 +> +[ 69.974141] ret_from_fork+0x10/0x20 +> +[ 69.975141] ---[ end trace 0000000000000000 ]--- +> +[ 70.117887] Into nd_namespace_pmem_set_resource() +> +> +> cxl_cmd->payload = cxl_dstate->mbox_reg_state + +> +> A_CXL_DEV_CMD_PAYLOAD; +> +> ret = (*h)(cxl_cmd, cxl_dstate, &len); +> +> +> +> +> +> This lets the nvdimm/region probe fine, but I'm getting some issues with +> +> namespace capacity so I'll look at what is causing that next. +> +> Unfortunately I'm not that familiar with the driver/nvdimm side of things +> +> so it's take a while to figure out what kicks off what! +> +> +> +> Jonathan +> +> +> +> > +> +> > Jonathan +> +> > +> +> > +> +> > > +> +> > > > +> +> > > > And x4 region still failed with same errors, using latest cxl/preview +> +> > > > branch don't work. +> +> > > > I have picked "Two CXL emulation fixes" patches in qemu, still not +> +> > > > working. +> +> > > > +> +> > > > Bob +> +> > +> +> > +> +> +> + +Jonathan Cameron wrote: +> +On Fri, 12 Aug 2022 16:44:03 +0100 +> +Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: +> +> +> On Thu, 11 Aug 2022 18:08:57 +0100 +> +> Jonathan Cameron via <qemu-devel@nongnu.org> wrote: +> +> +> +> > On Tue, 9 Aug 2022 17:08:25 +0100 +> +> > Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: +> +> > +> +> > > On Tue, 9 Aug 2022 21:07:06 +0800 +> +> > > Bobo WL <lmw.bobo@gmail.com> wrote: +> +> > > +> +> > > > Hi Jonathan +> +> > > > +> +> > > > Thanks for your reply! +> +> > > > +> +> > > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +> > > > <Jonathan.Cameron@huawei.com> wrote: +> +> > > > > +> +> > > > > Probably not related to your problem, but there is a disconnect in +> +> > > > > QEMU / +> +> > > > > kernel assumptionsaround the presence of an HDM decoder when a HB +> +> > > > > only +> +> > > > > has a single root port. Spec allows it to be provided or not as an +> +> > > > > implementation choice. +> +> > > > > Kernel assumes it isn't provide. Qemu assumes it is. +> +> > > > > +> +> > > > > The temporary solution is to throw in a second root port on the HB +> +> > > > > and not +> +> > > > > connect anything to it. Longer term I may special case this so +> +> > > > > that the particular +> +> > > > > decoder defaults to pass through settings in QEMU if there is only +> +> > > > > one root port. +> +> > > > > +> +> > > > +> +> > > > You are right! After adding an extra HB in qemu, I can create a x1 +> +> > > > region successfully. +> +> > > > But have some errors in Nvdimm: +> +> > > > +> +> > > > [ 74.925838] Unknown online node for memory at 0x10000000000, +> +> > > > assuming node 0 +> +> > > > [ 74.925846] Unknown target node for memory at 0x10000000000, +> +> > > > assuming node 0 +> +> > > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe +> +> > > > +> +> > > +> +> > > Ah. I've seen this one, but not chased it down yet. Was on my todo +> +> > > list to chase +> +> > > down. Once I reach this state I can verify the HDM Decode is correct +> +> > > which is what +> +> > > I've been using to test (Which wasn't true until earlier this week). +> +> > > I'm currently testing via devmem, more for historical reasons than +> +> > > because it makes +> +> > > that much sense anymore. +> +> > +> +> > *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. +> +> > I'd forgotten that was still on the todo list. I don't think it will +> +> > be particularly hard to do and will take a look in next few days. +> +> > +> +> > Very very indirectly this error is causing a driver probe fail that means +> +> > that +> +> > we hit a code path that has a rather odd looking check on NDD_LABELING. +> +> > Should not have gotten near that path though - hence the problem is +> +> > actually +> +> > when we call cxl_pmem_get_config_data() and it returns an error because +> +> > we haven't fully connected up the command in QEMU. +> +> +> +> So a least one bug in QEMU. We were not supporting variable length payloads +> +> on mailbox +> +> inputs (but were on outputs). That hasn't mattered until we get to LSA +> +> writes. +> +> We just need to relax condition on the supplied length. +> +> +> +> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +> +> index c352a935c4..fdda9529fe 100644 +> +> --- a/hw/cxl/cxl-mailbox-utils.c +> +> +++ b/hw/cxl/cxl-mailbox-utils.c +> +> @@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) +> +> cxl_cmd = &cxl_cmd_set[set][cmd]; +> +> h = cxl_cmd->handler; +> +> if (h) { +> +> - if (len == cxl_cmd->in) { +> +> + if (len == cxl_cmd->in || !cxl_cmd->in) { +> +Fix is wrong as we use ~0 as the placeholder for variable payload, not 0. +> +> +With that fixed we hit new fun paths - after some errors we get the +> +worrying - not totally sure but looks like a failure on an error cleanup. +> +I'll chase down the error source, but even then this is probably triggerable +> +by +> +hardware problem or similar. Some bonus prints in here from me chasing +> +error paths, but it's otherwise just cxl/next + the fix I posted earlier +> +today. +One of the scenarios that I cannot rule out is nvdimm_probe() racing +nd_region_probe(), but given all the work it takes to create a region I +suspect all the nvdimm_probe() work to have completed... + +It is at least one potentially wrong hypothesis that needs to be chased +down. + +> +> +[ 69.919877] nd_bus ndbus0: START: nd_region.probe(region0) +> +[ 69.920108] nd_region_probe +> +[ 69.920623] ------------[ cut here ]------------ +> +[ 69.920675] refcount_t: addition on 0; use-after-free. +> +[ 69.921314] WARNING: CPU: 3 PID: 710 at lib/refcount.c:25 +> +refcount_warn_saturate+0xa0/0x144 +> +[ 69.926949] Modules linked in: cxl_pmem cxl_mem cxl_pci cxl_port cxl_acpi +> +cxl_core +> +[ 69.928830] CPU: 3 PID: 710 Comm: kworker/u8:9 Not tainted 5.19.0-rc3+ #399 +> +[ 69.930596] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 +> +[ 69.931482] Workqueue: events_unbound async_run_entry_fn +> +[ 69.932403] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) +> +[ 69.934023] pc : refcount_warn_saturate+0xa0/0x144 +> +[ 69.935161] lr : refcount_warn_saturate+0xa0/0x144 +> +[ 69.936541] sp : ffff80000890b960 +> +[ 69.937921] x29: ffff80000890b960 x28: 0000000000000000 x27: +> +0000000000000000 +> +[ 69.940917] x26: ffffa54a90d5cb10 x25: ffffa54a90809e98 x24: +> +0000000000000000 +> +[ 69.942537] x23: ffffa54a91a3d8d8 x22: ffff0000c5254800 x21: +> +ffff0000c5254800 +> +[ 69.944013] x20: ffff0000ce924180 x19: ffff0000c5254800 x18: +> +ffffffffffffffff +> +[ 69.946100] x17: ffff5ab66e5ef000 x16: ffff80000801c000 x15: +> +0000000000000000 +> +[ 69.947585] x14: 0000000000000001 x13: 0a2e656572662d72 x12: +> +657466612d657375 +> +[ 69.948670] x11: 203b30206e6f206e x10: 6f69746964646120 x9 : +> +ffffa54a8f63d288 +> +[ 69.950679] x8 : 206e6f206e6f6974 x7 : 69646461203a745f x6 : +> +00000000fffff31e +> +[ 69.952113] x5 : ffff0000ff61ba08 x4 : 00000000fffff31e x3 : +> +ffff5ab66e5ef000 +> +root@debian:/sys/bus/cxl/devices/decoder0.0/region0# [ 69.954752] x2 : +> +0000000000000000 x1 : 0000000000000000 x0 : ffff0000c512e740 +> +[ 69.957098] Call trace: +> +[ 69.957959] refcount_warn_saturate+0xa0/0x144 +> +[ 69.958773] get_ndd+0x5c/0x80 +> +[ 69.959294] nd_region_register_namespaces+0xe4/0xe90 +> +[ 69.960253] nd_region_probe+0x100/0x290 +> +[ 69.960796] nvdimm_bus_probe+0xf4/0x1c0 +> +[ 69.962087] really_probe+0x19c/0x3f0 +> +[ 69.962620] __driver_probe_device+0x11c/0x190 +> +[ 69.963258] driver_probe_device+0x44/0xf4 +> +[ 69.963773] __device_attach_driver+0xa4/0x140 +> +[ 69.964471] bus_for_each_drv+0x84/0xe0 +> +[ 69.965068] __device_attach+0xb0/0x1f0 +> +[ 69.966101] device_initial_probe+0x20/0x30 +> +[ 69.967142] bus_probe_device+0xa4/0xb0 +> +[ 69.968104] device_add+0x3e8/0x910 +> +[ 69.969111] nd_async_device_register+0x24/0x74 +> +[ 69.969928] async_run_entry_fn+0x40/0x150 +> +[ 69.970725] process_one_work+0x1dc/0x450 +> +[ 69.971796] worker_thread+0x154/0x450 +> +[ 69.972700] kthread+0x118/0x120 +> +[ 69.974141] ret_from_fork+0x10/0x20 +> +[ 69.975141] ---[ end trace 0000000000000000 ]--- +> +[ 70.117887] Into nd_namespace_pmem_set_resource() + +On Mon, 15 Aug 2022 15:55:15 -0700 +Dan Williams <dan.j.williams@intel.com> wrote: + +> +Jonathan Cameron wrote: +> +> On Fri, 12 Aug 2022 16:44:03 +0100 +> +> Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: +> +> +> +> > On Thu, 11 Aug 2022 18:08:57 +0100 +> +> > Jonathan Cameron via <qemu-devel@nongnu.org> wrote: +> +> > +> +> > > On Tue, 9 Aug 2022 17:08:25 +0100 +> +> > > Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: +> +> > > +> +> > > > On Tue, 9 Aug 2022 21:07:06 +0800 +> +> > > > Bobo WL <lmw.bobo@gmail.com> wrote: +> +> > > > +> +> > > > > Hi Jonathan +> +> > > > > +> +> > > > > Thanks for your reply! +> +> > > > > +> +> > > > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +> > > > > <Jonathan.Cameron@huawei.com> wrote: +> +> > > > > > +> +> > > > > > Probably not related to your problem, but there is a disconnect +> +> > > > > > in QEMU / +> +> > > > > > kernel assumptionsaround the presence of an HDM decoder when a HB +> +> > > > > > only +> +> > > > > > has a single root port. Spec allows it to be provided or not as +> +> > > > > > an implementation choice. +> +> > > > > > Kernel assumes it isn't provide. Qemu assumes it is. +> +> > > > > > +> +> > > > > > The temporary solution is to throw in a second root port on the +> +> > > > > > HB and not +> +> > > > > > connect anything to it. Longer term I may special case this so +> +> > > > > > that the particular +> +> > > > > > decoder defaults to pass through settings in QEMU if there is +> +> > > > > > only one root port. +> +> > > > > > +> +> > > > > +> +> > > > > You are right! After adding an extra HB in qemu, I can create a x1 +> +> > > > > region successfully. +> +> > > > > But have some errors in Nvdimm: +> +> > > > > +> +> > > > > [ 74.925838] Unknown online node for memory at 0x10000000000, +> +> > > > > assuming node 0 +> +> > > > > [ 74.925846] Unknown target node for memory at 0x10000000000, +> +> > > > > assuming node 0 +> +> > > > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe +> +> > > > > +> +> > > > +> +> > > > Ah. I've seen this one, but not chased it down yet. Was on my todo +> +> > > > list to chase +> +> > > > down. Once I reach this state I can verify the HDM Decode is correct +> +> > > > which is what +> +> > > > I've been using to test (Which wasn't true until earlier this week). +> +> > > > I'm currently testing via devmem, more for historical reasons than +> +> > > > because it makes +> +> > > > that much sense anymore. +> +> > > +> +> > > *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. +> +> > > I'd forgotten that was still on the todo list. I don't think it will +> +> > > be particularly hard to do and will take a look in next few days. +> +> > > +> +> > > Very very indirectly this error is causing a driver probe fail that +> +> > > means that +> +> > > we hit a code path that has a rather odd looking check on NDD_LABELING. +> +> > > Should not have gotten near that path though - hence the problem is +> +> > > actually +> +> > > when we call cxl_pmem_get_config_data() and it returns an error because +> +> > > we haven't fully connected up the command in QEMU. +> +> > +> +> > So a least one bug in QEMU. We were not supporting variable length +> +> > payloads on mailbox +> +> > inputs (but were on outputs). That hasn't mattered until we get to LSA +> +> > writes. +> +> > We just need to relax condition on the supplied length. +> +> > +> +> > diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +> +> > index c352a935c4..fdda9529fe 100644 +> +> > --- a/hw/cxl/cxl-mailbox-utils.c +> +> > +++ b/hw/cxl/cxl-mailbox-utils.c +> +> > @@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) +> +> > cxl_cmd = &cxl_cmd_set[set][cmd]; +> +> > h = cxl_cmd->handler; +> +> > if (h) { +> +> > - if (len == cxl_cmd->in) { +> +> > + if (len == cxl_cmd->in || !cxl_cmd->in) { +> +> Fix is wrong as we use ~0 as the placeholder for variable payload, not 0. +> +> +> +> With that fixed we hit new fun paths - after some errors we get the +> +> worrying - not totally sure but looks like a failure on an error cleanup. +> +> I'll chase down the error source, but even then this is probably +> +> triggerable by +> +> hardware problem or similar. Some bonus prints in here from me chasing +> +> error paths, but it's otherwise just cxl/next + the fix I posted earlier +> +> today. +> +> +One of the scenarios that I cannot rule out is nvdimm_probe() racing +> +nd_region_probe(), but given all the work it takes to create a region I +> +suspect all the nvdimm_probe() work to have completed... +> +> +It is at least one potentially wrong hypothesis that needs to be chased +> +down. +Maybe there should be a special award for the non-intuitive +ndctl create-namespace command (modifies existing namespace and might create +a different empty one...) I'm sure there is some interesting history behind +that one :) + +Upshot is I just threw a filesystem on fsdax and wrote some text files on it +to allow easy grepping. The right data ends up in the memory and a plausible +namespace description is stored in the LSA. + +So to some degree at least it's 'working' on an 8 way direct connected +set of emulated devices. + +One snag is that serial number support isn't yet upstream in QEMU. +(I have had it in my tree for a while but not posted it yet because of + QEMU feature freeze) +https://gitlab.com/jic23/qemu/-/commit/144c783ea8a5fbe169f46ea1ba92940157f42733 +That's needed for meaningful cookie generation. Otherwise you can build the +namespace once, but it won't work on next probe as the cookie is 0 and you +hit some error paths. + +Maybe sensible to add a sanity check and fail namespace creation if +cookie is 0? (Silly side question, but is there a theoretical risk of +a serial number / other data combination leading to a fletcher64() +checksum that happens to be 0 - that would give a very odd bug report!) + +So to make it work the following is needed: + +1) The kernel fix for mailbox buffer overflow. +2) Qemu fix for size of arguements for get_lsa +3) Qemu fix to allow variable size input arguements (for set_lsa) +4) Serial number patch above + command lines to qemu to set appropriate + serial numbers. + +I'll send out the QEMU fixes shortly and post the Serial number patch, +though that almost certainly won't go in until next QEMU development +cycle starts in a few weeks. + +Next up, run through same tests on some other topologies. + +Jonathan + +> +> +> +> +> [ 69.919877] nd_bus ndbus0: START: nd_region.probe(region0) +> +> [ 69.920108] nd_region_probe +> +> [ 69.920623] ------------[ cut here ]------------ +> +> [ 69.920675] refcount_t: addition on 0; use-after-free. +> +> [ 69.921314] WARNING: CPU: 3 PID: 710 at lib/refcount.c:25 +> +> refcount_warn_saturate+0xa0/0x144 +> +> [ 69.926949] Modules linked in: cxl_pmem cxl_mem cxl_pci cxl_port +> +> cxl_acpi cxl_core +> +> [ 69.928830] CPU: 3 PID: 710 Comm: kworker/u8:9 Not tainted 5.19.0-rc3+ +> +> #399 +> +> [ 69.930596] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 +> +> 02/06/2015 +> +> [ 69.931482] Workqueue: events_unbound async_run_entry_fn +> +> [ 69.932403] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS +> +> BTYPE=--) +> +> [ 69.934023] pc : refcount_warn_saturate+0xa0/0x144 +> +> [ 69.935161] lr : refcount_warn_saturate+0xa0/0x144 +> +> [ 69.936541] sp : ffff80000890b960 +> +> [ 69.937921] x29: ffff80000890b960 x28: 0000000000000000 x27: +> +> 0000000000000000 +> +> [ 69.940917] x26: ffffa54a90d5cb10 x25: ffffa54a90809e98 x24: +> +> 0000000000000000 +> +> [ 69.942537] x23: ffffa54a91a3d8d8 x22: ffff0000c5254800 x21: +> +> ffff0000c5254800 +> +> [ 69.944013] x20: ffff0000ce924180 x19: ffff0000c5254800 x18: +> +> ffffffffffffffff +> +> [ 69.946100] x17: ffff5ab66e5ef000 x16: ffff80000801c000 x15: +> +> 0000000000000000 +> +> [ 69.947585] x14: 0000000000000001 x13: 0a2e656572662d72 x12: +> +> 657466612d657375 +> +> [ 69.948670] x11: 203b30206e6f206e x10: 6f69746964646120 x9 : +> +> ffffa54a8f63d288 +> +> [ 69.950679] x8 : 206e6f206e6f6974 x7 : 69646461203a745f x6 : +> +> 00000000fffff31e +> +> [ 69.952113] x5 : ffff0000ff61ba08 x4 : 00000000fffff31e x3 : +> +> ffff5ab66e5ef000 +> +> root@debian:/sys/bus/cxl/devices/decoder0.0/region0# [ 69.954752] x2 : +> +> 0000000000000000 x1 : 0000000000000000 x0 : ffff0000c512e740 +> +> [ 69.957098] Call trace: +> +> [ 69.957959] refcount_warn_saturate+0xa0/0x144 +> +> [ 69.958773] get_ndd+0x5c/0x80 +> +> [ 69.959294] nd_region_register_namespaces+0xe4/0xe90 +> +> [ 69.960253] nd_region_probe+0x100/0x290 +> +> [ 69.960796] nvdimm_bus_probe+0xf4/0x1c0 +> +> [ 69.962087] really_probe+0x19c/0x3f0 +> +> [ 69.962620] __driver_probe_device+0x11c/0x190 +> +> [ 69.963258] driver_probe_device+0x44/0xf4 +> +> [ 69.963773] __device_attach_driver+0xa4/0x140 +> +> [ 69.964471] bus_for_each_drv+0x84/0xe0 +> +> [ 69.965068] __device_attach+0xb0/0x1f0 +> +> [ 69.966101] device_initial_probe+0x20/0x30 +> +> [ 69.967142] bus_probe_device+0xa4/0xb0 +> +> [ 69.968104] device_add+0x3e8/0x910 +> +> [ 69.969111] nd_async_device_register+0x24/0x74 +> +> [ 69.969928] async_run_entry_fn+0x40/0x150 +> +> [ 69.970725] process_one_work+0x1dc/0x450 +> +> [ 69.971796] worker_thread+0x154/0x450 +> +> [ 69.972700] kthread+0x118/0x120 +> +> [ 69.974141] ret_from_fork+0x10/0x20 +> +> [ 69.975141] ---[ end trace 0000000000000000 ]--- +> +> [ 70.117887] Into nd_namespace_pmem_set_resource() + +Bobo WL wrote: +> +Hi list +> +> +I want to test cxl functions in arm64, and found some problems I can't +> +figure out. +> +> +My test environment: +> +> +1. build latest bios from +https://github.com/tianocore/edk2.git +master +> +branch(cc2db6ebfb6d9d85ba4c7b35fba1fa37fffc0bc2) +> +2. build latest qemu-system-aarch64 from git://git.qemu.org/qemu.git +> +master branch(846dcf0ba4eff824c295f06550b8673ff3f31314). With cxl arm +> +support patch: +> +https://patchwork.kernel.org/project/cxl/cover/20220616141950.23374-1-Jonathan.Cameron@huawei.com/ +> +3. build Linux kernel from +> +https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git +preview +> +branch(65fc1c3d26b96002a5aa1f4012fae4dc98fd5683) +> +4. build latest ndctl tools from +https://github.com/pmem/ndctl +> +create_region branch(8558b394e449779e3a4f3ae90fae77ede0bca159) +> +> +And my qemu test commands: +> +sudo $QEMU_BIN -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 \ +> +-cpu max -smp 8 -nographic -no-reboot \ +> +-kernel $KERNEL -bios $BIOS_BIN \ +> +-drive if=none,file=$ROOTFS,format=qcow2,id=hd \ +> +-device virtio-blk-pci,drive=hd -append 'root=/dev/vda1 +> +nokaslr dyndbg="module cxl* +p"' \ +> +-object memory-backend-ram,size=4G,id=mem0 \ +> +-numa node,nodeid=0,cpus=0-7,memdev=mem0 \ +> +-net nic -net user,hostfwd=tcp::2222-:22 -enable-kvm \ +> +-object +> +memory-backend-file,id=cxl-mem0,share=on,mem-path=/tmp/cxltest.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-mem1,share=on,mem-path=/tmp/cxltest1.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-mem2,share=on,mem-path=/tmp/cxltest2.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-mem3,share=on,mem-path=/tmp/cxltest3.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-lsa0,share=on,mem-path=/tmp/lsa0.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/lsa1.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-lsa2,share=on,mem-path=/tmp/lsa2.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-lsa3,share=on,mem-path=/tmp/lsa3.raw,size=256M +> +\ +> +-device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \ +> +-device cxl-rp,port=0,bus=cxl.1,id=root_port0,chassis=0,slot=0 \ +> +-device cxl-upstream,bus=root_port0,id=us0 \ +> +-device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \ +> +-device +> +cxl-type3,bus=swport0,memdev=cxl-mem0,lsa=cxl-lsa0,id=cxl-pmem0 \ +> +-device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \ +> +-device +> +cxl-type3,bus=swport1,memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem1 \ +> +-device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \ +> +-device +> +cxl-type3,bus=swport2,memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem2 \ +> +-device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \ +> +-device +> +cxl-type3,bus=swport3,memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3 \ +> +-M +> +cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k +> +> +And I have got two problems. +> +1. When I want to create x1 region with command: "cxl create-region -d +> +decoder0.0 -w 1 -g 4096 mem0", kernel crashed with null pointer +> +reference. Crash log: +> +> +[ 534.697324] cxl_region region0: config state: 0 +> +[ 534.697346] cxl_region region0: probe: -6 +> +[ 534.697368] cxl_acpi ACPI0017:00: decoder0.0: created region0 +> +[ 534.699115] cxl region0: mem0:endpoint3 decoder3.0 add: +> +mem0:decoder3.0 @ 0 next: none nr_eps: 1 nr_targets: 1 +> +[ 534.699149] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +> +mem0:decoder3.0 @ 0 next: mem0 nr_eps: 1 nr_targets: 1 +> +[ 534.699167] cxl region0: ACPI0016:00:port1 decoder1.0 add: +> +mem0:decoder3.0 @ 0 next: 0000:0d:00.0 nr_eps: 1 nr_targets: 1 +> +[ 534.699176] cxl region0: ACPI0016:00:port1 iw: 1 ig: 256 +> +[ 534.699182] cxl region0: ACPI0016:00:port1 target[0] = 0000:0c:00.0 +> +for mem0:decoder3.0 @ 0 +> +[ 534.699189] cxl region0: 0000:0d:00.0:port2 iw: 1 ig: 256 +> +[ 534.699193] cxl region0: 0000:0d:00.0:port2 target[0] = +> +0000:0e:00.0 for mem0:decoder3.0 @ 0 +> +[ 534.699405] Unable to handle kernel NULL pointer dereference at +> +virtual address 0000000000000000 +> +[ 534.701474] Mem abort info: +> +[ 534.701994] ESR = 0x0000000086000004 +> +[ 534.702653] EC = 0x21: IABT (current EL), IL = 32 bits +> +[ 534.703616] SET = 0, FnV = 0 +> +[ 534.704174] EA = 0, S1PTW = 0 +> +[ 534.704803] FSC = 0x04: level 0 translation fault +> +[ 534.705694] user pgtable: 4k pages, 48-bit VAs, pgdp=000000010144a000 +> +[ 534.706875] [0000000000000000] pgd=0000000000000000, p4d=0000000000000000 +> +[ 534.709855] Internal error: Oops: 86000004 [#1] PREEMPT SMP +> +[ 534.710301] Modules linked in: +> +[ 534.710546] CPU: 7 PID: 331 Comm: cxl Not tainted +> +5.19.0-rc3-00064-g65fc1c3d26b9-dirty #11 +> +[ 534.715393] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 +> +[ 534.717179] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) +> +[ 534.719190] pc : 0x0 +> +[ 534.719928] lr : commit_store+0x118/0x2cc +> +[ 534.721007] sp : ffff80000aec3c30 +> +[ 534.721793] x29: ffff80000aec3c30 x28: ffff0000da62e740 x27: +> +ffff0000c0c06b30 +> +[ 534.723875] x26: 0000000000000000 x25: ffff0000c0a2a400 x24: +> +ffff0000c0a29400 +> +[ 534.725440] x23: 0000000000000003 x22: 0000000000000000 x21: +> +ffff0000c0c06800 +> +[ 534.727312] x20: 0000000000000000 x19: ffff0000c1559800 x18: +> +0000000000000000 +> +[ 534.729138] x17: 0000000000000000 x16: 0000000000000000 x15: +> +0000ffffd41fe838 +> +[ 534.731046] x14: 0000000000000000 x13: 0000000000000000 x12: +> +0000000000000000 +> +[ 534.732402] x11: 0000000000000000 x10: 0000000000000000 x9 : +> +0000000000000000 +> +[ 534.734432] x8 : 0000000000000000 x7 : 0000000000000000 x6 : +> +ffff0000c0906e80 +> +[ 534.735921] x5 : 0000000000000000 x4 : 0000000000000000 x3 : +> +ffff80000aec3bf0 +> +[ 534.737437] x2 : 0000000000000000 x1 : 0000000000000000 x0 : +> +ffff0000c155a000 +> +[ 534.738878] Call trace: +> +[ 534.739368] 0x0 +> +[ 534.739713] dev_attr_store+0x1c/0x30 +> +[ 534.740186] sysfs_kf_write+0x48/0x58 +> +[ 534.740961] kernfs_fop_write_iter+0x128/0x184 +> +[ 534.741872] new_sync_write+0xdc/0x158 +> +[ 534.742706] vfs_write+0x1ac/0x2a8 +> +[ 534.743440] ksys_write+0x68/0xf0 +> +[ 534.744328] __arm64_sys_write+0x1c/0x28 +> +[ 534.745180] invoke_syscall+0x44/0xf0 +> +[ 534.745989] el0_svc_common+0x4c/0xfc +> +[ 534.746661] do_el0_svc+0x60/0xa8 +> +[ 534.747378] el0_svc+0x2c/0x78 +> +[ 534.748066] el0t_64_sync_handler+0xb8/0x12c +> +[ 534.748919] el0t_64_sync+0x18c/0x190 +> +[ 534.749629] Code: bad PC value +> +[ 534.750169] ---[ end trace 0000000000000000 ]--- +What was the top kernel commit when you ran this test? What is the line +number of "commit_store+0x118"? + +> +2. When I want to create x4 region with command: "cxl create-region -d +> +decoder0.0 -w 4 -g 4096 -m mem0 mem1 mem2 mem3". I got below errors: +> +> +cxl region: create_region: region0: failed to set target3 to mem3 +> +cxl region: cmd_create_region: created 0 regions +> +> +And kernel log as below: +> +[ 60.536663] cxl_region region0: config state: 0 +> +[ 60.536675] cxl_region region0: probe: -6 +> +[ 60.536696] cxl_acpi ACPI0017:00: decoder0.0: created region0 +> +[ 60.538251] cxl region0: mem0:endpoint3 decoder3.0 add: +> +mem0:decoder3.0 @ 0 next: none nr_eps: 1 nr_targets: 1 +> +[ 60.538278] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +> +mem0:decoder3.0 @ 0 next: mem0 nr_eps: 1 nr_targets: 1 +> +[ 60.538295] cxl region0: ACPI0016:00:port1 decoder1.0 add: +> +mem0:decoder3.0 @ 0 next: 0000:0d:00.0 nr_eps: 1 nr_targets: 1 +> +[ 60.538647] cxl region0: mem1:endpoint4 decoder4.0 add: +> +mem1:decoder4.0 @ 1 next: none nr_eps: 1 nr_targets: 1 +> +[ 60.538663] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +> +mem1:decoder4.0 @ 1 next: mem1 nr_eps: 2 nr_targets: 2 +> +[ 60.538675] cxl region0: ACPI0016:00:port1 decoder1.0 add: +> +mem1:decoder4.0 @ 1 next: 0000:0d:00.0 nr_eps: 2 nr_targets: 1 +> +[ 60.539311] cxl region0: mem2:endpoint5 decoder5.0 add: +> +mem2:decoder5.0 @ 2 next: none nr_eps: 1 nr_targets: 1 +> +[ 60.539332] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +> +mem2:decoder5.0 @ 2 next: mem2 nr_eps: 3 nr_targets: 3 +> +[ 60.539343] cxl region0: ACPI0016:00:port1 decoder1.0 add: +> +mem2:decoder5.0 @ 2 next: 0000:0d:00.0 nr_eps: 3 nr_targets: 1 +> +[ 60.539711] cxl region0: mem3:endpoint6 decoder6.0 add: +> +mem3:decoder6.0 @ 3 next: none nr_eps: 1 nr_targets: 1 +> +[ 60.539723] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +> +mem3:decoder6.0 @ 3 next: mem3 nr_eps: 4 nr_targets: 4 +> +[ 60.539735] cxl region0: ACPI0016:00:port1 decoder1.0 add: +> +mem3:decoder6.0 @ 3 next: 0000:0d:00.0 nr_eps: 4 nr_targets: 1 +> +[ 60.539742] cxl region0: ACPI0016:00:port1 iw: 1 ig: 256 +> +[ 60.539747] cxl region0: ACPI0016:00:port1 target[0] = 0000:0c:00.0 +> +for mem0:decoder3.0 @ 0 +> +[ 60.539754] cxl region0: 0000:0d:00.0:port2 iw: 4 ig: 512 +> +[ 60.539758] cxl region0: 0000:0d:00.0:port2 target[0] = +> +0000:0e:00.0 for mem0:decoder3.0 @ 0 +> +[ 60.539764] cxl region0: ACPI0016:00:port1: cannot host mem1:decoder4.0 at +> +1 +> +> +I have tried to write sysfs node manually, got same errors. +> +> +Hope I can get some helps here. +What is the output of: + + cxl list -MDTu -d decoder0.0 + +...? It might be the case that mem1 cannot be mapped by decoder0.0, or +at least not in the specified order, or that validation check is broken. + +Hi Dan, + +Thanks for your reply! + +On Mon, Aug 8, 2022 at 11:58 PM Dan Williams <dan.j.williams@intel.com> wrote: +> +> +What is the output of: +> +> +cxl list -MDTu -d decoder0.0 +> +> +...? It might be the case that mem1 cannot be mapped by decoder0.0, or +> +at least not in the specified order, or that validation check is broken. +Command "cxl list -MDTu -d decoder0.0" output: + +[ + { + "memdevs":[ + { + "memdev":"mem2", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":0, + "serial":"0", + "host":"0000:11:00.0" + }, + { + "memdev":"mem1", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":0, + "serial":"0", + "host":"0000:10:00.0" + }, + { + "memdev":"mem0", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":0, + "serial":"0", + "host":"0000:0f:00.0" + }, + { + "memdev":"mem3", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":0, + "serial":"0", + "host":"0000:12:00.0" + } + ] + }, + { + "root decoders":[ + { + "decoder":"decoder0.0", + "resource":"0x10000000000", + "size":"4.00 GiB (4.29 GB)", + "pmem_capable":true, + "volatile_capable":true, + "accelmem_capable":true, + "nr_targets":1, + "targets":[ + { + "target":"ACPI0016:01", + "alias":"pci0000:0c", + "position":0, + "id":"0xc" + } + ] + } + ] + } +] + +Bobo WL wrote: +> +Hi Dan, +> +> +Thanks for your reply! +> +> +On Mon, Aug 8, 2022 at 11:58 PM Dan Williams <dan.j.williams@intel.com> wrote: +> +> +> +> What is the output of: +> +> +> +> cxl list -MDTu -d decoder0.0 +> +> +> +> ...? It might be the case that mem1 cannot be mapped by decoder0.0, or +> +> at least not in the specified order, or that validation check is broken. +> +> +Command "cxl list -MDTu -d decoder0.0" output: +Thanks for this, I think I know the problem, but will try some +experiments with cxl_test first. + +Did the commit_store() crash stop reproducing with latest cxl/preview +branch? + +On Tue, Aug 9, 2022 at 11:17 PM Dan Williams <dan.j.williams@intel.com> wrote: +> +> +Bobo WL wrote: +> +> Hi Dan, +> +> +> +> Thanks for your reply! +> +> +> +> On Mon, Aug 8, 2022 at 11:58 PM Dan Williams <dan.j.williams@intel.com> +> +> wrote: +> +> > +> +> > What is the output of: +> +> > +> +> > cxl list -MDTu -d decoder0.0 +> +> > +> +> > ...? It might be the case that mem1 cannot be mapped by decoder0.0, or +> +> > at least not in the specified order, or that validation check is broken. +> +> +> +> Command "cxl list -MDTu -d decoder0.0" output: +> +> +Thanks for this, I think I know the problem, but will try some +> +experiments with cxl_test first. +> +> +Did the commit_store() crash stop reproducing with latest cxl/preview +> +branch? +No, still hitting this bug if don't add extra HB device in qemu + +Dan Williams wrote: +> +Bobo WL wrote: +> +> Hi Dan, +> +> +> +> Thanks for your reply! +> +> +> +> On Mon, Aug 8, 2022 at 11:58 PM Dan Williams <dan.j.williams@intel.com> +> +> wrote: +> +> > +> +> > What is the output of: +> +> > +> +> > cxl list -MDTu -d decoder0.0 +> +> > +> +> > ...? It might be the case that mem1 cannot be mapped by decoder0.0, or +> +> > at least not in the specified order, or that validation check is broken. +> +> +> +> Command "cxl list -MDTu -d decoder0.0" output: +> +> +Thanks for this, I think I know the problem, but will try some +> +experiments with cxl_test first. +Hmm, so my cxl_test experiment unfortunately passed so I'm not +reproducing the failure mode. This is the result of creating x4 region +with devices directly attached to a single host-bridge: + +# cxl create-region -d decoder3.5 -w 4 -m -g 256 mem{12,10,9,11} -s $((1<<30)) +{ + "region":"region8", + "resource":"0xf1f0000000", + "size":"1024.00 MiB (1073.74 MB)", + "interleave_ways":4, + "interleave_granularity":256, + "decode_state":"commit", + "mappings":[ + { + "position":3, + "memdev":"mem11", + "decoder":"decoder21.0" + }, + { + "position":2, + "memdev":"mem9", + "decoder":"decoder19.0" + }, + { + "position":1, + "memdev":"mem10", + "decoder":"decoder20.0" + }, + { + "position":0, + "memdev":"mem12", + "decoder":"decoder22.0" + } + ] +} +cxl region: cmd_create_region: created 1 region + +> +Did the commit_store() crash stop reproducing with latest cxl/preview +> +branch? +I missed the answer to this question. + +All of these changes are now in Linus' tree perhaps give that a try and +post the debug log again? + +On Thu, 11 Aug 2022 17:46:55 -0700 +Dan Williams <dan.j.williams@intel.com> wrote: + +> +Dan Williams wrote: +> +> Bobo WL wrote: +> +> > Hi Dan, +> +> > +> +> > Thanks for your reply! +> +> > +> +> > On Mon, Aug 8, 2022 at 11:58 PM Dan Williams <dan.j.williams@intel.com> +> +> > wrote: +> +> > > +> +> > > What is the output of: +> +> > > +> +> > > cxl list -MDTu -d decoder0.0 +> +> > > +> +> > > ...? It might be the case that mem1 cannot be mapped by decoder0.0, or +> +> > > at least not in the specified order, or that validation check is +> +> > > broken. +> +> > +> +> > Command "cxl list -MDTu -d decoder0.0" output: +> +> +> +> Thanks for this, I think I know the problem, but will try some +> +> experiments with cxl_test first. +> +> +Hmm, so my cxl_test experiment unfortunately passed so I'm not +> +reproducing the failure mode. This is the result of creating x4 region +> +with devices directly attached to a single host-bridge: +> +> +# cxl create-region -d decoder3.5 -w 4 -m -g 256 mem{12,10,9,11} -s $((1<<30)) +> +{ +> +"region":"region8", +> +"resource":"0xf1f0000000", +> +"size":"1024.00 MiB (1073.74 MB)", +> +"interleave_ways":4, +> +"interleave_granularity":256, +> +"decode_state":"commit", +> +"mappings":[ +> +{ +> +"position":3, +> +"memdev":"mem11", +> +"decoder":"decoder21.0" +> +}, +> +{ +> +"position":2, +> +"memdev":"mem9", +> +"decoder":"decoder19.0" +> +}, +> +{ +> +"position":1, +> +"memdev":"mem10", +> +"decoder":"decoder20.0" +> +}, +> +{ +> +"position":0, +> +"memdev":"mem12", +> +"decoder":"decoder22.0" +> +} +> +] +> +} +> +cxl region: cmd_create_region: created 1 region +> +> +> Did the commit_store() crash stop reproducing with latest cxl/preview +> +> branch? +> +> +I missed the answer to this question. +> +> +All of these changes are now in Linus' tree perhaps give that a try and +> +post the debug log again? +Hi Dan, + +I've moved onto looking at this one. +1 HB, 2RP (to make it configure the HDM decoder in the QEMU HB, I'll tidy that +up +at some stage), 1 switch, 4 downstream switch ports each with a type 3 + +I'm not getting a crash, but can't successfully setup a region. +Upon adding the final target +It's failing in check_last_peer() as pos < distance. +Seems distance is 4 which makes me think it's using the wrong level of the +heirarchy for +some reason or that distance check is wrong. +Wasn't a good idea to just skip that step though as it goes boom - though +stack trace is not useful. + +Jonathan + +On Wed, 17 Aug 2022 17:16:19 +0100 +Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: + +> +On Thu, 11 Aug 2022 17:46:55 -0700 +> +Dan Williams <dan.j.williams@intel.com> wrote: +> +> +> Dan Williams wrote: +> +> > Bobo WL wrote: +> +> > > Hi Dan, +> +> > > +> +> > > Thanks for your reply! +> +> > > +> +> > > On Mon, Aug 8, 2022 at 11:58 PM Dan Williams <dan.j.williams@intel.com> +> +> > > wrote: +> +> > > > +> +> > > > What is the output of: +> +> > > > +> +> > > > cxl list -MDTu -d decoder0.0 +> +> > > > +> +> > > > ...? It might be the case that mem1 cannot be mapped by decoder0.0, or +> +> > > > at least not in the specified order, or that validation check is +> +> > > > broken. +> +> > > +> +> > > Command "cxl list -MDTu -d decoder0.0" output: +> +> > +> +> > Thanks for this, I think I know the problem, but will try some +> +> > experiments with cxl_test first. +> +> +> +> Hmm, so my cxl_test experiment unfortunately passed so I'm not +> +> reproducing the failure mode. This is the result of creating x4 region +> +> with devices directly attached to a single host-bridge: +> +> +> +> # cxl create-region -d decoder3.5 -w 4 -m -g 256 mem{12,10,9,11} -s +> +> $((1<<30)) +> +> { +> +> "region":"region8", +> +> "resource":"0xf1f0000000", +> +> "size":"1024.00 MiB (1073.74 MB)", +> +> "interleave_ways":4, +> +> "interleave_granularity":256, +> +> "decode_state":"commit", +> +> "mappings":[ +> +> { +> +> "position":3, +> +> "memdev":"mem11", +> +> "decoder":"decoder21.0" +> +> }, +> +> { +> +> "position":2, +> +> "memdev":"mem9", +> +> "decoder":"decoder19.0" +> +> }, +> +> { +> +> "position":1, +> +> "memdev":"mem10", +> +> "decoder":"decoder20.0" +> +> }, +> +> { +> +> "position":0, +> +> "memdev":"mem12", +> +> "decoder":"decoder22.0" +> +> } +> +> ] +> +> } +> +> cxl region: cmd_create_region: created 1 region +> +> +> +> > Did the commit_store() crash stop reproducing with latest cxl/preview +> +> > branch? +> +> +> +> I missed the answer to this question. +> +> +> +> All of these changes are now in Linus' tree perhaps give that a try and +> +> post the debug log again? +> +> +Hi Dan, +> +> +I've moved onto looking at this one. +> +1 HB, 2RP (to make it configure the HDM decoder in the QEMU HB, I'll tidy +> +that up +> +at some stage), 1 switch, 4 downstream switch ports each with a type 3 +> +> +I'm not getting a crash, but can't successfully setup a region. +> +Upon adding the final target +> +It's failing in check_last_peer() as pos < distance. +> +Seems distance is 4 which makes me think it's using the wrong level of the +> +heirarchy for +> +some reason or that distance check is wrong. +> +Wasn't a good idea to just skip that step though as it goes boom - though +> +stack trace is not useful. +Turns out really weird corruption happens if you accidentally back two type3 +devices +with the same memory device. Who would have thought it :) + +That aside ignoring the check_last_peer() failure seems to make everything work +for this +topology. I'm not seeing the crash, so my guess is we fixed it somewhere along +the way. + +Now for the fun one. I've replicated the crash if we have + +1HB 1*RP 1SW, 4SW-DSP, 4Type3 + +Now, I'd expect to see it not 'work' because the QEMU HDM decoder won't be +programmed +but the null pointer dereference isn't related to that. + +The bug is straight forward. Not all decoders have commit callbacks... Will +send out +a possible fix shortly. + +Jonathan + + + +> +> +Jonathan +> +> +> +> +> +> + +On Thu, 18 Aug 2022 17:37:40 +0100 +Jonathan Cameron via <qemu-devel@nongnu.org> wrote: + +> +On Wed, 17 Aug 2022 17:16:19 +0100 +> +Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: +> +> +> On Thu, 11 Aug 2022 17:46:55 -0700 +> +> Dan Williams <dan.j.williams@intel.com> wrote: +> +> +> +> > Dan Williams wrote: +> +> > > Bobo WL wrote: +> +> > > > Hi Dan, +> +> > > > +> +> > > > Thanks for your reply! +> +> > > > +> +> > > > On Mon, Aug 8, 2022 at 11:58 PM Dan Williams +> +> > > > <dan.j.williams@intel.com> wrote: +> +> > > > > +> +> > > > > What is the output of: +> +> > > > > +> +> > > > > cxl list -MDTu -d decoder0.0 +> +> > > > > +> +> > > > > ...? It might be the case that mem1 cannot be mapped by decoder0.0, +> +> > > > > or +> +> > > > > at least not in the specified order, or that validation check is +> +> > > > > broken. +> +> > > > +> +> > > > Command "cxl list -MDTu -d decoder0.0" output: +> +> > > +> +> > > Thanks for this, I think I know the problem, but will try some +> +> > > experiments with cxl_test first. +> +> > +> +> > Hmm, so my cxl_test experiment unfortunately passed so I'm not +> +> > reproducing the failure mode. This is the result of creating x4 region +> +> > with devices directly attached to a single host-bridge: +> +> > +> +> > # cxl create-region -d decoder3.5 -w 4 -m -g 256 mem{12,10,9,11} -s +> +> > $((1<<30)) +> +> > { +> +> > "region":"region8", +> +> > "resource":"0xf1f0000000", +> +> > "size":"1024.00 MiB (1073.74 MB)", +> +> > "interleave_ways":4, +> +> > "interleave_granularity":256, +> +> > "decode_state":"commit", +> +> > "mappings":[ +> +> > { +> +> > "position":3, +> +> > "memdev":"mem11", +> +> > "decoder":"decoder21.0" +> +> > }, +> +> > { +> +> > "position":2, +> +> > "memdev":"mem9", +> +> > "decoder":"decoder19.0" +> +> > }, +> +> > { +> +> > "position":1, +> +> > "memdev":"mem10", +> +> > "decoder":"decoder20.0" +> +> > }, +> +> > { +> +> > "position":0, +> +> > "memdev":"mem12", +> +> > "decoder":"decoder22.0" +> +> > } +> +> > ] +> +> > } +> +> > cxl region: cmd_create_region: created 1 region +> +> > +> +> > > Did the commit_store() crash stop reproducing with latest cxl/preview +> +> > > branch? +> +> > +> +> > I missed the answer to this question. +> +> > +> +> > All of these changes are now in Linus' tree perhaps give that a try and +> +> > post the debug log again? +> +> +> +> Hi Dan, +> +> +> +> I've moved onto looking at this one. +> +> 1 HB, 2RP (to make it configure the HDM decoder in the QEMU HB, I'll tidy +> +> that up +> +> at some stage), 1 switch, 4 downstream switch ports each with a type 3 +> +> +> +> I'm not getting a crash, but can't successfully setup a region. +> +> Upon adding the final target +> +> It's failing in check_last_peer() as pos < distance. +> +> Seems distance is 4 which makes me think it's using the wrong level of the +> +> heirarchy for +> +> some reason or that distance check is wrong. +> +> Wasn't a good idea to just skip that step though as it goes boom - though +> +> stack trace is not useful. +> +> +Turns out really weird corruption happens if you accidentally back two type3 +> +devices +> +with the same memory device. Who would have thought it :) +> +> +That aside ignoring the check_last_peer() failure seems to make everything +> +work for this +> +topology. I'm not seeing the crash, so my guess is we fixed it somewhere +> +along the way. +> +> +Now for the fun one. I've replicated the crash if we have +> +> +1HB 1*RP 1SW, 4SW-DSP, 4Type3 +> +> +Now, I'd expect to see it not 'work' because the QEMU HDM decoder won't be +> +programmed +> +but the null pointer dereference isn't related to that. +> +> +The bug is straight forward. Not all decoders have commit callbacks... Will +> +send out +> +a possible fix shortly. +> +For completeness I'm carrying this hack because I haven't gotten my head +around the right fix for check_last_peer() failing on this test topology. + +diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c +index c49d9a5f1091..275e143bd748 100644 +--- a/drivers/cxl/core/region.c ++++ b/drivers/cxl/core/region.c +@@ -978,7 +978,7 @@ static int cxl_port_setup_targets(struct cxl_port *port, + rc = check_last_peer(cxled, ep, cxl_rr, + distance); + if (rc) +- return rc; ++ // return rc; + goto out_target_set; + } + goto add_target; +-- + +I might find more bugs with more testing, but this is all the ones I've +seen so far + in Bobo's reports. Qemu fixes are now in upstream so +will be there in the release. + +As a reminder, testing on QEMU has a few corners... + +Need a patch to add serial number ECAP support. It is on list for revew, +but will have wait for after QEMU 7.1 release (which may be next week) + +QEMU still assumes HDM decoder on the host bridge will be programmed. +So if you want anything to work there should be at least +2 RP below the HB (no need to plug anything in to one of them). + +I don't want to add a commandline parameter to hide the decoder in QEMU +and detecting there is only one RP would require moving a bunch of static +stuff into runtime code (I think). + +I still think we should make the kernel check to see if there is a decoder, +but if not I might see how bad a hack it is to have QEMU ignore that decoder +if not committed in this one special case (HB HDM decoder with only one place +it can send stuff). Obviously that would be a break from specification +so less than idea! + +Thanks, + +Jonathan + +On Fri, 19 Aug 2022 09:46:55 +0100 +Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: + +> +On Thu, 18 Aug 2022 17:37:40 +0100 +> +Jonathan Cameron via <qemu-devel@nongnu.org> wrote: +> +> +> On Wed, 17 Aug 2022 17:16:19 +0100 +> +> Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: +> +> +> +> > On Thu, 11 Aug 2022 17:46:55 -0700 +> +> > Dan Williams <dan.j.williams@intel.com> wrote: +> +> > +> +> > > Dan Williams wrote: +> +> > > > Bobo WL wrote: +> +> > > > > Hi Dan, +> +> > > > > +> +> > > > > Thanks for your reply! +> +> > > > > +> +> > > > > On Mon, Aug 8, 2022 at 11:58 PM Dan Williams +> +> > > > > <dan.j.williams@intel.com> wrote: +> +> > > > > > +> +> > > > > > What is the output of: +> +> > > > > > +> +> > > > > > cxl list -MDTu -d decoder0.0 +> +> > > > > > +> +> > > > > > ...? It might be the case that mem1 cannot be mapped by +> +> > > > > > decoder0.0, or +> +> > > > > > at least not in the specified order, or that validation check is +> +> > > > > > broken. +> +> > > > > +> +> > > > > Command "cxl list -MDTu -d decoder0.0" output: +> +> > > > +> +> > > > Thanks for this, I think I know the problem, but will try some +> +> > > > experiments with cxl_test first. +> +> > > +> +> > > Hmm, so my cxl_test experiment unfortunately passed so I'm not +> +> > > reproducing the failure mode. This is the result of creating x4 region +> +> > > with devices directly attached to a single host-bridge: +> +> > > +> +> > > # cxl create-region -d decoder3.5 -w 4 -m -g 256 mem{12,10,9,11} -s +> +> > > $((1<<30)) +> +> > > { +> +> > > "region":"region8", +> +> > > "resource":"0xf1f0000000", +> +> > > "size":"1024.00 MiB (1073.74 MB)", +> +> > > "interleave_ways":4, +> +> > > "interleave_granularity":256, +> +> > > "decode_state":"commit", +> +> > > "mappings":[ +> +> > > { +> +> > > "position":3, +> +> > > "memdev":"mem11", +> +> > > "decoder":"decoder21.0" +> +> > > }, +> +> > > { +> +> > > "position":2, +> +> > > "memdev":"mem9", +> +> > > "decoder":"decoder19.0" +> +> > > }, +> +> > > { +> +> > > "position":1, +> +> > > "memdev":"mem10", +> +> > > "decoder":"decoder20.0" +> +> > > }, +> +> > > { +> +> > > "position":0, +> +> > > "memdev":"mem12", +> +> > > "decoder":"decoder22.0" +> +> > > } +> +> > > ] +> +> > > } +> +> > > cxl region: cmd_create_region: created 1 region +> +> > > +> +> > > > Did the commit_store() crash stop reproducing with latest cxl/preview +> +> > > > branch? +> +> > > +> +> > > I missed the answer to this question. +> +> > > +> +> > > All of these changes are now in Linus' tree perhaps give that a try and +> +> > > post the debug log again? +> +> > +> +> > Hi Dan, +> +> > +> +> > I've moved onto looking at this one. +> +> > 1 HB, 2RP (to make it configure the HDM decoder in the QEMU HB, I'll tidy +> +> > that up +> +> > at some stage), 1 switch, 4 downstream switch ports each with a type 3 +> +> > +> +> > I'm not getting a crash, but can't successfully setup a region. +> +> > Upon adding the final target +> +> > It's failing in check_last_peer() as pos < distance. +> +> > Seems distance is 4 which makes me think it's using the wrong level of +> +> > the heirarchy for +> +> > some reason or that distance check is wrong. +> +> > Wasn't a good idea to just skip that step though as it goes boom - though +> +> > stack trace is not useful. +> +> +> +> Turns out really weird corruption happens if you accidentally back two +> +> type3 devices +> +> with the same memory device. Who would have thought it :) +> +> +> +> That aside ignoring the check_last_peer() failure seems to make everything +> +> work for this +> +> topology. I'm not seeing the crash, so my guess is we fixed it somewhere +> +> along the way. +> +> +> +> Now for the fun one. I've replicated the crash if we have +> +> +> +> 1HB 1*RP 1SW, 4SW-DSP, 4Type3 +> +> +> +> Now, I'd expect to see it not 'work' because the QEMU HDM decoder won't be +> +> programmed +> +> but the null pointer dereference isn't related to that. +> +> +> +> The bug is straight forward. Not all decoders have commit callbacks... +> +> Will send out +> +> a possible fix shortly. +> +> +> +For completeness I'm carrying this hack because I haven't gotten my head +> +around the right fix for check_last_peer() failing on this test topology. +> +> +diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c +> +index c49d9a5f1091..275e143bd748 100644 +> +--- a/drivers/cxl/core/region.c +> ++++ b/drivers/cxl/core/region.c +> +@@ -978,7 +978,7 @@ static int cxl_port_setup_targets(struct cxl_port *port, +> +rc = check_last_peer(cxled, ep, cxl_rr, +> +distance); +> +if (rc) +> +- return rc; +> ++ // return rc; +> +goto out_target_set; +> +} +> +goto add_target; +I'm still carrying this hack and still haven't worked out the right fix. + +Suggestions welcome! If not I'll hopefully get some time on this +towards the end of the week. + +Jonathan + diff --git a/results/classifier/zero-shot/003/KVM/42613410 b/results/classifier/zero-shot/003/KVM/42613410 new file mode 100644 index 000000000..662d0dfa1 --- /dev/null +++ b/results/classifier/zero-shot/003/KVM/42613410 @@ -0,0 +1,152 @@ +KVM: 0.381 +other: 0.332 +semantic: 0.327 +mistranslation: 0.314 +instruction: 0.307 +network: 0.284 +boot: 0.187 + +[Qemu-devel] [PATCH, Bug 1612908] scripts: Add TCP endpoints for qom-* scripts + +From: Carl Allendorph <address@hidden> + +I've created a patch for bug #1612908. The current docs for the scripts +in the "scripts/qmp/" directory suggest that both unix sockets and +tcp endpoints can be used. The TCP endpoints don't work for most of the +scripts, with notable exception of 'qmp-shell'. This patch attempts to +refactor the process of distinguishing between unix path endpoints and +tcp endpoints to work for all of these scripts. + +Carl Allendorph (1): + scripts: Add ability for qom-* python scripts to target tcp endpoints + + scripts/qmp/qmp-shell | 22 ++-------------------- + scripts/qmp/qmp.py | 23 ++++++++++++++++++++--- + 2 files changed, 22 insertions(+), 23 deletions(-) + +-- +2.7.4 + +From: Carl Allendorph <address@hidden> + +The current code for QEMUMonitorProtocol accepts both a unix socket +endpoint as a string and a tcp endpoint as a tuple. Most of the scripts +that use this class don't massage the command line argument to generate +a tuple. This patch refactors qmp-shell slightly to reuse the existing +parsing of the "host:port" string for all the qom-* scripts. + +Signed-off-by: Carl Allendorph <address@hidden> +--- + scripts/qmp/qmp-shell | 22 ++-------------------- + scripts/qmp/qmp.py | 23 ++++++++++++++++++++--- + 2 files changed, 22 insertions(+), 23 deletions(-) + +diff --git a/scripts/qmp/qmp-shell b/scripts/qmp/qmp-shell +index 0373b24..8a2a437 100755 +--- a/scripts/qmp/qmp-shell ++++ b/scripts/qmp/qmp-shell +@@ -83,9 +83,6 @@ class QMPCompleter(list): + class QMPShellError(Exception): + pass + +-class QMPShellBadPort(QMPShellError): +- pass +- + class FuzzyJSON(ast.NodeTransformer): + '''This extension of ast.NodeTransformer filters literal "true/false/null" + values in an AST and replaces them by proper "True/False/None" values that +@@ -103,28 +100,13 @@ class FuzzyJSON(ast.NodeTransformer): + # _execute_cmd()). Let's design a better one. + class QMPShell(qmp.QEMUMonitorProtocol): + def __init__(self, address, pretty=False): +- qmp.QEMUMonitorProtocol.__init__(self, self.__get_address(address)) ++ qmp.QEMUMonitorProtocol.__init__(self, address) + self._greeting = None + self._completer = None + self._pretty = pretty + self._transmode = False + self._actions = list() + +- def __get_address(self, arg): +- """ +- Figure out if the argument is in the port:host form, if it's not it's +- probably a file path. +- """ +- addr = arg.split(':') +- if len(addr) == 2: +- try: +- port = int(addr[1]) +- except ValueError: +- raise QMPShellBadPort +- return ( addr[0], port ) +- # socket path +- return arg +- + def _fill_completion(self): + for cmd in self.cmd('query-commands')['return']: + self._completer.append(cmd['name']) +@@ -400,7 +382,7 @@ def main(): + + if qemu is None: + fail_cmdline() +- except QMPShellBadPort: ++ except qmp.QMPShellBadPort: + die('bad port number in command-line') + + try: +diff --git a/scripts/qmp/qmp.py b/scripts/qmp/qmp.py +index 62d3651..261ece8 100644 +--- a/scripts/qmp/qmp.py ++++ b/scripts/qmp/qmp.py +@@ -25,21 +25,23 @@ class QMPCapabilitiesError(QMPError): + class QMPTimeoutError(QMPError): + pass + ++class QMPShellBadPort(QMPError): ++ pass ++ + class QEMUMonitorProtocol: + def __init__(self, address, server=False, debug=False): + """ + Create a QEMUMonitorProtocol class. + + @param address: QEMU address, can be either a unix socket path (string) +- or a tuple in the form ( address, port ) for a TCP +- connection ++ or a TCP endpoint (string in the format "host:port") + @param server: server mode listens on the socket (bool) + @raise socket.error on socket connection errors + @note No connection is established, this is done by the connect() or + accept() methods + """ + self.__events = [] +- self.__address = address ++ self.__address = self.__get_address(address) + self._debug = debug + self.__sock = self.__get_sock() + if server: +@@ -47,6 +49,21 @@ class QEMUMonitorProtocol: + self.__sock.bind(self.__address) + self.__sock.listen(1) + ++ def __get_address(self, arg): ++ """ ++ Figure out if the argument is in the port:host form, if it's not it's ++ probably a file path. ++ """ ++ addr = arg.split(':') ++ if len(addr) == 2: ++ try: ++ port = int(addr[1]) ++ except ValueError: ++ raise QMPShellBadPort ++ return ( addr[0], port ) ++ # socket path ++ return arg ++ + def __get_sock(self): + if isinstance(self.__address, tuple): + family = socket.AF_INET +-- +2.7.4 + diff --git a/results/classifier/zero-shot/003/KVM/43643137 b/results/classifier/zero-shot/003/KVM/43643137 new file mode 100644 index 000000000..27cf0401f --- /dev/null +++ b/results/classifier/zero-shot/003/KVM/43643137 @@ -0,0 +1,541 @@ +KVM: 0.794 +other: 0.781 +semantic: 0.764 +instruction: 0.754 +network: 0.709 +mistranslation: 0.665 +boot: 0.652 + +[Qemu-devel] [BUG/RFC] INIT IPI lost when VM starts + +Hi, +We encountered a problem that when a domain starts, seabios failed to online a +vCPU. + +After investigation, we found that the reason is in kvm-kmod, KVM_APIC_INIT bit +in +vcpu->arch.apic->pending_events was overwritten by qemu, and thus an INIT IPI +sent +to AP was lost. Qemu does this since libvirtd sends a âquery-cpusâ qmp command +to qemu +on VM start. + +In qemu, qmp_query_cpus-> cpu_synchronize_state-> kvm_cpu_synchronize_state-> +do_kvm_cpu_synchronize_state, qemu gets registers/vcpu_events from kvm-kmod and +sets cpu->kvm_vcpu_dirty to true, and vcpu thread in qemu will call +kvm_arch_put_registers if cpu->kvm_vcpu_dirty is true, thus pending_events is +overwritten by qemu. + +I think there is no need for qemu to set cpu->kvm_vcpu_dirty to true after +âquery-cpusâ, +and kvm-kmod should not clear KVM_APIC_INIT unconditionally. And I am not sure +whether +it is OK for qemu to set cpu->kvm_vcpu_dirty in do_kvm_cpu_synchronize_state in +each caller. + +Whatâs your opinion? + +Let me clarify it more clearly. Time sequence is that qemu handles âquery-cpusâ qmp +command, vcpu 1 (and vcpu 0) got registers from kvm-kmod (qmp_query_cpus-> +cpu_synchronize_state-> kvm_cpu_synchronize_state-> +> do_kvm_cpu_synchronize_state-> kvm_arch_get_registers), then vcpu 0 (BSP) +sends INIT-SIPI to vcpu 1(AP). In kvm-kmod, vcpu 1âs pending_eventsâs KVM_APIC_INIT +bit set. +Then vcpu 1 continue running, vcpu1 thread in qemu calls +kvm_arch_put_registers-> kvm_put_vcpu_events, so KVM_APIC_INIT bit in vcpu 1âs +pending_events got cleared, i.e., lost. + +In kvm-kmod, except for pending_events, sipi_vector may also be overwritten., +so I am not sure if there are other fields/registers in danger, i.e., those may +be modified asynchronously with vcpu thread itself. + +BTW, using a sleep like following can reliably reproduce this problem, if VM +equipped with more than 2 vcpus and starting VM using libvirtd. + +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index 55865db..5099290 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -2534,6 +2534,11 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level) + KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SIPI_VECTOR; + } + ++ if (CPU(cpu)->cpu_index == 1) { ++ fprintf(stderr, "vcpu 1 sleep!!!!\n"); ++ sleep(10); ++ } ++ + return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events); + } + + +On 2017/3/20 22:21, Herongguang (Stephen) wrote: +Hi, +We encountered a problem that when a domain starts, seabios failed to online a +vCPU. + +After investigation, we found that the reason is in kvm-kmod, KVM_APIC_INIT bit +in +vcpu->arch.apic->pending_events was overwritten by qemu, and thus an INIT IPI +sent +to AP was lost. Qemu does this since libvirtd sends a âquery-cpusâ qmp command +to qemu +on VM start. + +In qemu, qmp_query_cpus-> cpu_synchronize_state-> kvm_cpu_synchronize_state-> +do_kvm_cpu_synchronize_state, qemu gets registers/vcpu_events from kvm-kmod and +sets cpu->kvm_vcpu_dirty to true, and vcpu thread in qemu will call +kvm_arch_put_registers if cpu->kvm_vcpu_dirty is true, thus pending_events is +overwritten by qemu. + +I think there is no need for qemu to set cpu->kvm_vcpu_dirty to true after +âquery-cpusâ, +and kvm-kmod should not clear KVM_APIC_INIT unconditionally. And I am not sure +whether +it is OK for qemu to set cpu->kvm_vcpu_dirty in do_kvm_cpu_synchronize_state in +each caller. + +Whatâs your opinion? + +On 20/03/2017 15:21, Herongguang (Stephen) wrote: +> +> +We encountered a problem that when a domain starts, seabios failed to +> +online a vCPU. +> +> +After investigation, we found that the reason is in kvm-kmod, +> +KVM_APIC_INIT bit in +> +vcpu->arch.apic->pending_events was overwritten by qemu, and thus an +> +INIT IPI sent +> +to AP was lost. Qemu does this since libvirtd sends a âquery-cpusâ qmp +> +command to qemu +> +on VM start. +> +> +In qemu, qmp_query_cpus-> cpu_synchronize_state-> +> +kvm_cpu_synchronize_state-> +> +do_kvm_cpu_synchronize_state, qemu gets registers/vcpu_events from +> +kvm-kmod and +> +sets cpu->kvm_vcpu_dirty to true, and vcpu thread in qemu will call +> +kvm_arch_put_registers if cpu->kvm_vcpu_dirty is true, thus +> +pending_events is +> +overwritten by qemu. +> +> +I think there is no need for qemu to set cpu->kvm_vcpu_dirty to true +> +after âquery-cpusâ, +> +and kvm-kmod should not clear KVM_APIC_INIT unconditionally. And I am +> +not sure whether +> +it is OK for qemu to set cpu->kvm_vcpu_dirty in +> +do_kvm_cpu_synchronize_state in each caller. +> +> +Whatâs your opinion? +Hi Rongguang, + +sorry for the late response. + +Where exactly is KVM_APIC_INIT dropped? kvm_get_mp_state does clear the +bit, but the result of the INIT is stored in mp_state. + +kvm_get_vcpu_events is called after kvm_get_mp_state; it retrieves +KVM_APIC_INIT in events.smi.latched_init and kvm_set_vcpu_events passes +it back. Maybe it should ignore events.smi.latched_init if not in SMM, +but I would like to understand the exact sequence of events. + +Thanks, + +paolo + +On 2017/4/6 0:16, Paolo Bonzini wrote: +On 20/03/2017 15:21, Herongguang (Stephen) wrote: +We encountered a problem that when a domain starts, seabios failed to +online a vCPU. + +After investigation, we found that the reason is in kvm-kmod, +KVM_APIC_INIT bit in +vcpu->arch.apic->pending_events was overwritten by qemu, and thus an +INIT IPI sent +to AP was lost. Qemu does this since libvirtd sends a âquery-cpusâ qmp +command to qemu +on VM start. + +In qemu, qmp_query_cpus-> cpu_synchronize_state-> +kvm_cpu_synchronize_state-> +do_kvm_cpu_synchronize_state, qemu gets registers/vcpu_events from +kvm-kmod and +sets cpu->kvm_vcpu_dirty to true, and vcpu thread in qemu will call +kvm_arch_put_registers if cpu->kvm_vcpu_dirty is true, thus +pending_events is +overwritten by qemu. + +I think there is no need for qemu to set cpu->kvm_vcpu_dirty to true +after âquery-cpusâ, +and kvm-kmod should not clear KVM_APIC_INIT unconditionally. And I am +not sure whether +it is OK for qemu to set cpu->kvm_vcpu_dirty in +do_kvm_cpu_synchronize_state in each caller. + +Whatâs your opinion? +Hi Rongguang, + +sorry for the late response. + +Where exactly is KVM_APIC_INIT dropped? kvm_get_mp_state does clear the +bit, but the result of the INIT is stored in mp_state. +It's dropped in KVM_SET_VCPU_EVENTS, see below. +kvm_get_vcpu_events is called after kvm_get_mp_state; it retrieves +KVM_APIC_INIT in events.smi.latched_init and kvm_set_vcpu_events passes +it back. Maybe it should ignore events.smi.latched_init if not in SMM, +but I would like to understand the exact sequence of events. +time0: +vcpu1: +qmp_query_cpus-> cpu_synchronize_state-> kvm_cpu_synchronize_state-> +> do_kvm_cpu_synchronize_state(and set vcpu1's cpu->kvm_vcpu_dirty to true)-> +kvm_arch_get_registers(KVM_APIC_INIT bit in vcpu->arch.apic->pending_events was not set) + +time1: +vcpu0: +send INIT-SIPI to all AP->(in vcpu 0's context)__apic_accept_irq(KVM_APIC_INIT bit +in vcpu1's arch.apic->pending_events is set) + +time2: +vcpu1: +kvm_cpu_exec->(if cpu->kvm_vcpu_dirty is +true)kvm_arch_put_registers->kvm_put_vcpu_events(overwritten KVM_APIC_INIT bit in +vcpu->arch.apic->pending_events!) + +So it's a race between vcpu1 get/put registers with kvm/other vcpus changing +vcpu1's status/structure fields in the mean time, I am in worry of if there are +other fields may be overwritten, +sipi_vector is one. + +also see: +https://www.mail-archive.com/address@hidden/msg438675.html +Thanks, + +paolo + +. + +Hi Paolo, + +What's your opinion about this patch? We found it just before finishing patches +for the past two days. + + +Thanks, +-Gonglei + + +> +-----Original Message----- +> +From: address@hidden [ +mailto:address@hidden +On +> +Behalf Of Herongguang (Stephen) +> +Sent: Thursday, April 06, 2017 9:47 AM +> +To: Paolo Bonzini; address@hidden; address@hidden; +> +address@hidden; address@hidden; address@hidden; +> +wangxin (U); Huangweidong (C) +> +Subject: Re: [BUG/RFC] INIT IPI lost when VM starts +> +> +> +> +On 2017/4/6 0:16, Paolo Bonzini wrote: +> +> +> +> On 20/03/2017 15:21, Herongguang (Stephen) wrote: +> +>> We encountered a problem that when a domain starts, seabios failed to +> +>> online a vCPU. +> +>> +> +>> After investigation, we found that the reason is in kvm-kmod, +> +>> KVM_APIC_INIT bit in +> +>> vcpu->arch.apic->pending_events was overwritten by qemu, and thus an +> +>> INIT IPI sent +> +>> to AP was lost. Qemu does this since libvirtd sends a âquery-cpusâ qmp +> +>> command to qemu +> +>> on VM start. +> +>> +> +>> In qemu, qmp_query_cpus-> cpu_synchronize_state-> +> +>> kvm_cpu_synchronize_state-> +> +>> do_kvm_cpu_synchronize_state, qemu gets registers/vcpu_events from +> +>> kvm-kmod and +> +>> sets cpu->kvm_vcpu_dirty to true, and vcpu thread in qemu will call +> +>> kvm_arch_put_registers if cpu->kvm_vcpu_dirty is true, thus +> +>> pending_events is +> +>> overwritten by qemu. +> +>> +> +>> I think there is no need for qemu to set cpu->kvm_vcpu_dirty to true +> +>> after âquery-cpusâ, +> +>> and kvm-kmod should not clear KVM_APIC_INIT unconditionally. And I am +> +>> not sure whether +> +>> it is OK for qemu to set cpu->kvm_vcpu_dirty in +> +>> do_kvm_cpu_synchronize_state in each caller. +> +>> +> +>> Whatâs your opinion? +> +> Hi Rongguang, +> +> +> +> sorry for the late response. +> +> +> +> Where exactly is KVM_APIC_INIT dropped? kvm_get_mp_state does clear +> +the +> +> bit, but the result of the INIT is stored in mp_state. +> +> +It's dropped in KVM_SET_VCPU_EVENTS, see below. +> +> +> +> +> kvm_get_vcpu_events is called after kvm_get_mp_state; it retrieves +> +> KVM_APIC_INIT in events.smi.latched_init and kvm_set_vcpu_events passes +> +> it back. Maybe it should ignore events.smi.latched_init if not in SMM, +> +> but I would like to understand the exact sequence of events. +> +> +time0: +> +vcpu1: +> +qmp_query_cpus-> cpu_synchronize_state-> kvm_cpu_synchronize_state-> +> +> do_kvm_cpu_synchronize_state(and set vcpu1's cpu->kvm_vcpu_dirty to +> +true)-> kvm_arch_get_registers(KVM_APIC_INIT bit in +> +vcpu->arch.apic->pending_events was not set) +> +> +time1: +> +vcpu0: +> +send INIT-SIPI to all AP->(in vcpu 0's +> +context)__apic_accept_irq(KVM_APIC_INIT bit in vcpu1's +> +arch.apic->pending_events is set) +> +> +time2: +> +vcpu1: +> +kvm_cpu_exec->(if cpu->kvm_vcpu_dirty is +> +true)kvm_arch_put_registers->kvm_put_vcpu_events(overwritten +> +KVM_APIC_INIT bit in vcpu->arch.apic->pending_events!) +> +> +So it's a race between vcpu1 get/put registers with kvm/other vcpus changing +> +vcpu1's status/structure fields in the mean time, I am in worry of if there +> +are +> +other fields may be overwritten, +> +sipi_vector is one. +> +> +also see: +> +https://www.mail-archive.com/address@hidden/msg438675.html +> +> +> Thanks, +> +> +> +> paolo +> +> +> +> . +> +> +> + +2017-11-20 06:57+0000, Gonglei (Arei): +> +Hi Paolo, +> +> +What's your opinion about this patch? We found it just before finishing +> +patches +> +for the past two days. +I think your case was fixed by f4ef19108608 ("KVM: X86: Fix loss of +pending INIT due to race"), but that patch didn't fix it perfectly, so +maybe you're hitting a similar case that happens in SMM ... + +> +> -----Original Message----- +> +> From: address@hidden [ +mailto:address@hidden +On +> +> Behalf Of Herongguang (Stephen) +> +> On 2017/4/6 0:16, Paolo Bonzini wrote: +> +> > Hi Rongguang, +> +> > +> +> > sorry for the late response. +> +> > +> +> > Where exactly is KVM_APIC_INIT dropped? kvm_get_mp_state does clear +> +> the +> +> > bit, but the result of the INIT is stored in mp_state. +> +> +> +> It's dropped in KVM_SET_VCPU_EVENTS, see below. +> +> +> +> > +> +> > kvm_get_vcpu_events is called after kvm_get_mp_state; it retrieves +> +> > KVM_APIC_INIT in events.smi.latched_init and kvm_set_vcpu_events passes +> +> > it back. Maybe it should ignore events.smi.latched_init if not in SMM, +> +> > but I would like to understand the exact sequence of events. +> +> +> +> time0: +> +> vcpu1: +> +> qmp_query_cpus-> cpu_synchronize_state-> kvm_cpu_synchronize_state-> +> +> > do_kvm_cpu_synchronize_state(and set vcpu1's cpu->kvm_vcpu_dirty to +> +> true)-> kvm_arch_get_registers(KVM_APIC_INIT bit in +> +> vcpu->arch.apic->pending_events was not set) +> +> +> +> time1: +> +> vcpu0: +> +> send INIT-SIPI to all AP->(in vcpu 0's +> +> context)__apic_accept_irq(KVM_APIC_INIT bit in vcpu1's +> +> arch.apic->pending_events is set) +> +> +> +> time2: +> +> vcpu1: +> +> kvm_cpu_exec->(if cpu->kvm_vcpu_dirty is +> +> true)kvm_arch_put_registers->kvm_put_vcpu_events(overwritten +> +> KVM_APIC_INIT bit in vcpu->arch.apic->pending_events!) +> +> +> +> So it's a race between vcpu1 get/put registers with kvm/other vcpus changing +> +> vcpu1's status/structure fields in the mean time, I am in worry of if there +> +> are +> +> other fields may be overwritten, +> +> sipi_vector is one. +Fields that can be asynchronously written by other VCPUs (like SIPI, +NMI) must not be SET if other VCPUs were not paused since the last GET. +(Looking at the interface, we can currently lose pending SMI.) + +INIT is one of the restricted fields, but the API unconditionally +couples SMM with latched INIT, which means that we can lose an INIT if +the VCPU is in SMM mode -- do you see SMM in kvm_vcpu_events? + +Thanks. + diff --git a/results/classifier/zero-shot/003/KVM/55961334 b/results/classifier/zero-shot/003/KVM/55961334 new file mode 100644 index 000000000..a097dd25a --- /dev/null +++ b/results/classifier/zero-shot/003/KVM/55961334 @@ -0,0 +1,42 @@ +KVM: 0.881 +instruction: 0.803 +semantic: 0.775 +mistranslation: 0.718 +other: 0.715 +network: 0.697 +boot: 0.569 + +[Bug] "-ht" flag ignored under KVM - guest still reports HT + +Hi Community, +We have observed that the 'ht' feature bit cannot be disabled when QEMU runs +with KVM acceleration. +qemu-system-x86_64 \ + --enable-kvm \ + -machine q35 \ + -cpu host,-ht \ + -smp 4 \ + -m 4G \ + -drive file=rootfs.img,format=raw \ + -nographic \ + -append 'console=ttyS0 root=/dev/sda rw' +Because '-ht' is specified, the guest should expose no HT capability +(cpuid.1.edx[28] = 0), and /proc/cpuinfo shouldn't show HT feature, but we still +saw ht in linux guest when run 'cat /proc/cpuinfo'. +XiaoYao mentioned that: + +It has been the behavior of QEMU since + + commit 400281af34e5ee6aa9f5496b53d8f82c6fef9319 + Author: Andre Przywara <andre.przywara@amd.com> + Date: Wed Aug 19 15:42:42 2009 +0200 + + set CPUID bits to present cores and threads topology + +that we cannot remove HT CPUID bit from guest via "-cpu xxx,-ht" if the +VM has >= 2 vcpus. +I'd like to know whether there's a plan to address this issue, or if the current +behaviour is considered acceptable. +Best regards, +Ewan. + diff --git a/results/classifier/zero-shot/003/KVM/71456293 b/results/classifier/zero-shot/003/KVM/71456293 new file mode 100644 index 000000000..e8ded0bbc --- /dev/null +++ b/results/classifier/zero-shot/003/KVM/71456293 @@ -0,0 +1,1489 @@ +KVM: 0.691 +mistranslation: 0.659 +instruction: 0.624 +semantic: 0.600 +other: 0.598 +boot: 0.598 +network: 0.491 + +[Qemu-devel][bug] qemu crash when migrate vm and vm's disks + +When migrate vm and vmâs disks target host qemu crash due to an invalid free. +#0 object_unref (obj=0x1000) at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/qom/object.c:920 +#1 0x0000560434d79e79 in memory_region_unref (mr=<optimized out>) +at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:1730 +#2 flatview_destroy (view=0x560439653880) at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:292 +#3 0x000056043514dfbe in call_rcu_thread (opaque=<optimized out>) +at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/util/rcu.c:284 +#4 0x00007fbc2b36fe25 in start_thread () from /lib64/libpthread.so.0 +#5 0x00007fbc2b099bad in clone () from /lib64/libc.so.6 +test base qemu-2.12.0 +ï¼ +but use lastest qemu(v6.0.0-rc2) also reproduce. +As follow patch can resolve this problem: +https://lists.gnu.org/archive/html/qemu-devel/2018-07/msg02272.html +Steps to reproduce: +(1) Create VM (virsh define) +(2) Add 64 virtio scsi disks +(3) migrate vm and vmâdisks +------------------------------------------------------------------------------------------------------------------------------------- +æ¬é®ä»¶åå ¶é件嫿æ°åä¸éå¢çä¿å¯ä¿¡æ¯ï¼ä» éäºåéç»ä¸é¢å°åä¸ååº +ç个人æç¾¤ç»ãç¦æ¢ä»»ä½å ¶ä»äººä»¥ä»»ä½å½¢å¼ä½¿ç¨ï¼å æ¬ä½ä¸éäºå ¨é¨æé¨åå°æ³é²ãå¤å¶ã +ææ£åï¼æ¬é®ä»¶ä¸çä¿¡æ¯ã妿æ¨éæ¶äºæ¬é®ä»¶ï¼è¯·æ¨ç«å³çµè¯æé®ä»¶éç¥å件人并å 餿¬ +é®ä»¶ï¼ +This e-mail and its attachments contain confidential information from New H3C, which is +intended only for the person or entity whose address is listed above. Any use of the +information contained herein in any way (including, but not limited to, total or partial +disclosure, reproduction, or dissemination) by persons other than the intended +recipient(s) is prohibited. If you receive this e-mail in error, please notify the sender +by phone or email immediately and delete it! + +* Yuchen (yu.chen@h3c.com) wrote: +> +When migrate vm and vmâs disks target host qemu crash due to an invalid free. +> +> +#0 object_unref (obj=0x1000) at +> +/qemu-2.12/rpmbuild/BUILD/qemu-2.12/qom/object.c:920 +> +#1 0x0000560434d79e79 in memory_region_unref (mr=<optimized out>) +> +at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:1730 +> +#2 flatview_destroy (view=0x560439653880) at +> +/qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:292 +> +#3 0x000056043514dfbe in call_rcu_thread (opaque=<optimized out>) +> +at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/util/rcu.c:284 +> +#4 0x00007fbc2b36fe25 in start_thread () from /lib64/libpthread.so.0 +> +#5 0x00007fbc2b099bad in clone () from /lib64/libc.so.6 +> +> +test base qemu-2.12.0ï¼but use lastest qemu(v6.0.0-rc2) also reproduce. +Interesting. + +> +As follow patch can resolve this problem: +> +https://lists.gnu.org/archive/html/qemu-devel/2018-07/msg02272.html +That's a pci/rcu change; ccing Paolo and Micahel. + +> +Steps to reproduce: +> +(1) Create VM (virsh define) +> +(2) Add 64 virtio scsi disks +Is that hot adding the disks later, or are they included in the VM at +creation? +Can you provide a libvirt XML example? + +> +(3) migrate vm and vmâdisks +What do you mean by 'and vm disks' - are you doing a block migration? + +Dave + +> +------------------------------------------------------------------------------------------------------------------------------------- +> +æ¬é®ä»¶åå ¶é件嫿æ°åä¸éå¢çä¿å¯ä¿¡æ¯ï¼ä» éäºåéç»ä¸é¢å°åä¸ååº +> +ç个人æç¾¤ç»ãç¦æ¢ä»»ä½å ¶ä»äººä»¥ä»»ä½å½¢å¼ä½¿ç¨ï¼å æ¬ä½ä¸éäºå ¨é¨æé¨åå°æ³é²ãå¤å¶ã +> +ææ£åï¼æ¬é®ä»¶ä¸çä¿¡æ¯ã妿æ¨éæ¶äºæ¬é®ä»¶ï¼è¯·æ¨ç«å³çµè¯æé®ä»¶éç¥å件人并å 餿¬ +> +é®ä»¶ï¼ +> +This e-mail and its attachments contain confidential information from New +> +H3C, which is +> +intended only for the person or entity whose address is listed above. Any use +> +of the +> +information contained herein in any way (including, but not limited to, total +> +or partial +> +disclosure, reproduction, or dissemination) by persons other than the intended +> +recipient(s) is prohibited. If you receive this e-mail in error, please +> +notify the sender +> +by phone or email immediately and delete it! +-- +Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK + +> +-----é®ä»¶åä»¶----- +> +å件人: Dr. David Alan Gilbert [ +mailto:dgilbert@redhat.com +] +> +åéæ¶é´: 2021å¹´4æ8æ¥ 19:27 +> +æ¶ä»¶äºº: yuchen (Cloud) <yu.chen@h3c.com>; pbonzini@redhat.com; +> +mst@redhat.com +> +æé: qemu-devel@nongnu.org +> +主é¢: Re: [Qemu-devel][bug] qemu crash when migrate vm and vm's disks +> +> +* Yuchen (yu.chen@h3c.com) wrote: +> +> When migrate vm and vmâs disks target host qemu crash due to an invalid +> +free. +> +> +> +> #0 object_unref (obj=0x1000) at +> +> /qemu-2.12/rpmbuild/BUILD/qemu-2.12/qom/object.c:920 +> +> #1 0x0000560434d79e79 in memory_region_unref (mr=<optimized out>) +> +> at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:1730 +> +> #2 flatview_destroy (view=0x560439653880) at +> +> /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:292 +> +> #3 0x000056043514dfbe in call_rcu_thread (opaque=<optimized out>) +> +> at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/util/rcu.c:284 +> +> #4 0x00007fbc2b36fe25 in start_thread () from /lib64/libpthread.so.0 +> +> #5 0x00007fbc2b099bad in clone () from /lib64/libc.so.6 +> +> +> +> test base qemu-2.12.0ï¼but use lastest qemu(v6.0.0-rc2) also reproduce. +> +> +Interesting. +> +> +> As follow patch can resolve this problem: +> +> +https://lists.gnu.org/archive/html/qemu-devel/2018-07/msg02272.html +> +> +That's a pci/rcu change; ccing Paolo and Micahel. +> +> +> Steps to reproduce: +> +> (1) Create VM (virsh define) +> +> (2) Add 64 virtio scsi disks +> +> +Is that hot adding the disks later, or are they included in the VM at +> +creation? +> +Can you provide a libvirt XML example? +> +Include disks in the VM at creation + +vm disks xml (only virtio scsi disks): + <devices> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native'/> + <source file='/vms/tempp/vm-os'/> + <target dev='vda' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x08' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data1'/> + <target dev='sda' bus='scsi'/> + <address type='drive' controller='2' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data2'/> + <target dev='sdb' bus='scsi'/> + <address type='drive' controller='3' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data3'/> + <target dev='sdc' bus='scsi'/> + <address type='drive' controller='4' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data4'/> + <target dev='sdd' bus='scsi'/> + <address type='drive' controller='5' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data5'/> + <target dev='sde' bus='scsi'/> + <address type='drive' controller='6' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data6'/> + <target dev='sdf' bus='scsi'/> + <address type='drive' controller='7' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data7'/> + <target dev='sdg' bus='scsi'/> + <address type='drive' controller='8' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data8'/> + <target dev='sdh' bus='scsi'/> + <address type='drive' controller='9' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data9'/> + <target dev='sdi' bus='scsi'/> + <address type='drive' controller='10' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data10'/> + <target dev='sdj' bus='scsi'/> + <address type='drive' controller='11' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data11'/> + <target dev='sdk' bus='scsi'/> + <address type='drive' controller='12' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data12'/> + <target dev='sdl' bus='scsi'/> + <address type='drive' controller='13' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data13'/> + <target dev='sdm' bus='scsi'/> + <address type='drive' controller='14' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data14'/> + <target dev='sdn' bus='scsi'/> + <address type='drive' controller='15' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data15'/> + <target dev='sdo' bus='scsi'/> + <address type='drive' controller='16' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data16'/> + <target dev='sdp' bus='scsi'/> + <address type='drive' controller='17' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data17'/> + <target dev='sdq' bus='scsi'/> + <address type='drive' controller='18' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data18'/> + <target dev='sdr' bus='scsi'/> + <address type='drive' controller='19' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data19'/> + <target dev='sds' bus='scsi'/> + <address type='drive' controller='20' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data20'/> + <target dev='sdt' bus='scsi'/> + <address type='drive' controller='21' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data21'/> + <target dev='sdu' bus='scsi'/> + <address type='drive' controller='22' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data22'/> + <target dev='sdv' bus='scsi'/> + <address type='drive' controller='23' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data23'/> + <target dev='sdw' bus='scsi'/> + <address type='drive' controller='24' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data24'/> + <target dev='sdx' bus='scsi'/> + <address type='drive' controller='25' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data25'/> + <target dev='sdy' bus='scsi'/> + <address type='drive' controller='26' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data26'/> + <target dev='sdz' bus='scsi'/> + <address type='drive' controller='27' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data27'/> + <target dev='sdaa' bus='scsi'/> + <address type='drive' controller='28' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data28'/> + <target dev='sdab' bus='scsi'/> + <address type='drive' controller='29' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data29'/> + <target dev='sdac' bus='scsi'/> + <address type='drive' controller='30' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data30'/> + <target dev='sdad' bus='scsi'/> + <address type='drive' controller='31' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data31'/> + <target dev='sdae' bus='scsi'/> + <address type='drive' controller='32' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data32'/> + <target dev='sdaf' bus='scsi'/> + <address type='drive' controller='33' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data33'/> + <target dev='sdag' bus='scsi'/> + <address type='drive' controller='34' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data34'/> + <target dev='sdah' bus='scsi'/> + <address type='drive' controller='35' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data35'/> + <target dev='sdai' bus='scsi'/> + <address type='drive' controller='36' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data36'/> + <target dev='sdaj' bus='scsi'/> + <address type='drive' controller='37' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data37'/> + <target dev='sdak' bus='scsi'/> + <address type='drive' controller='38' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data38'/> + <target dev='sdal' bus='scsi'/> + <address type='drive' controller='39' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data39'/> + <target dev='sdam' bus='scsi'/> + <address type='drive' controller='40' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data40'/> + <target dev='sdan' bus='scsi'/> + <address type='drive' controller='41' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data41'/> + <target dev='sdao' bus='scsi'/> + <address type='drive' controller='42' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data42'/> + <target dev='sdap' bus='scsi'/> + <address type='drive' controller='43' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data43'/> + <target dev='sdaq' bus='scsi'/> + <address type='drive' controller='44' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data44'/> + <target dev='sdar' bus='scsi'/> + <address type='drive' controller='45' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data45'/> + <target dev='sdas' bus='scsi'/> + <address type='drive' controller='46' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data46'/> + <target dev='sdat' bus='scsi'/> + <address type='drive' controller='47' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data47'/> + <target dev='sdau' bus='scsi'/> + <address type='drive' controller='48' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data48'/> + <target dev='sdav' bus='scsi'/> + <address type='drive' controller='49' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data49'/> + <target dev='sdaw' bus='scsi'/> + <address type='drive' controller='50' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data50'/> + <target dev='sdax' bus='scsi'/> + <address type='drive' controller='51' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data51'/> + <target dev='sday' bus='scsi'/> + <address type='drive' controller='52' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data52'/> + <target dev='sdaz' bus='scsi'/> + <address type='drive' controller='53' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data53'/> + <target dev='sdba' bus='scsi'/> + <address type='drive' controller='54' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data54'/> + <target dev='sdbb' bus='scsi'/> + <address type='drive' controller='55' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data55'/> + <target dev='sdbc' bus='scsi'/> + <address type='drive' controller='56' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data56'/> + <target dev='sdbd' bus='scsi'/> + <address type='drive' controller='57' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data57'/> + <target dev='sdbe' bus='scsi'/> + <address type='drive' controller='58' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data58'/> + <target dev='sdbf' bus='scsi'/> + <address type='drive' controller='59' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data59'/> + <target dev='sdbg' bus='scsi'/> + <address type='drive' controller='60' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data60'/> + <target dev='sdbh' bus='scsi'/> + <address type='drive' controller='61' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data61'/> + <target dev='sdbi' bus='scsi'/> + <address type='drive' controller='62' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data62'/> + <target dev='sdbj' bus='scsi'/> + <address type='drive' controller='63' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data63'/> + <target dev='sdbk' bus='scsi'/> + <address type='drive' controller='64' bus='0' target='0' unit='0'/> + </disk> + <controller type='scsi' index='0'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x02' +function='0x0'/> + </controller> + <controller type='scsi' index='1' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x06' +function='0x0'/> + </controller> + <controller type='scsi' index='2' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x01' +function='0x0'/> + </controller> + <controller type='scsi' index='3' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x03' +function='0x0'/> + </controller> + <controller type='scsi' index='4' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x04' +function='0x0'/> + </controller> + <controller type='scsi' index='5' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x05' +function='0x0'/> + </controller> + <controller type='scsi' index='6' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x06' +function='0x0'/> + </controller> + <controller type='scsi' index='7' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x07' +function='0x0'/> + </controller> + <controller type='scsi' index='8' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x08' +function='0x0'/> + </controller> + <controller type='scsi' index='9' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x09' +function='0x0'/> + </controller> + <controller type='scsi' index='10' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0a' +function='0x0'/> + </controller> + <controller type='scsi' index='11' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0b' +function='0x0'/> + </controller> + <controller type='scsi' index='12' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0c' +function='0x0'/> + </controller> + <controller type='scsi' index='13' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0d' +function='0x0'/> + </controller> + <controller type='scsi' index='14' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0e' +function='0x0'/> + </controller> + <controller type='scsi' index='15' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0f' +function='0x0'/> + </controller> + <controller type='scsi' index='16' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x10' +function='0x0'/> + </controller> + <controller type='scsi' index='17' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x11' +function='0x0'/> + </controller> + <controller type='scsi' index='18' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x12' +function='0x0'/> + </controller> + <controller type='scsi' index='19' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x13' +function='0x0'/> + </controller> + <controller type='scsi' index='20' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x14' +function='0x0'/> + </controller> + <controller type='scsi' index='21' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x15' +function='0x0'/> + </controller> + <controller type='scsi' index='22' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x16' +function='0x0'/> + </controller> + <controller type='scsi' index='23' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x17' +function='0x0'/> + </controller> + <controller type='scsi' index='24' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x18' +function='0x0'/> + </controller> + <controller type='scsi' index='25' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x19' +function='0x0'/> + </controller> + <controller type='scsi' index='26' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1a' +function='0x0'/> + </controller> + <controller type='scsi' index='27' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1b' +function='0x0'/> + </controller> + <controller type='scsi' index='28' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1c' +function='0x0'/> + </controller> + <controller type='scsi' index='29' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1d' +function='0x0'/> + </controller> + <controller type='scsi' index='30' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1e' +function='0x0'/> + </controller> + <controller type='scsi' index='31' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x01' +function='0x0'/> + </controller> + <controller type='scsi' index='32' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x02' +function='0x0'/> + </controller> + <controller type='scsi' index='33' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x03' +function='0x0'/> + </controller> + <controller type='scsi' index='34' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x04' +function='0x0'/> + </controller> + <controller type='scsi' index='35' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x05' +function='0x0'/> + </controller> + <controller type='scsi' index='36' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x06' +function='0x0'/> + </controller> + <controller type='scsi' index='37' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x07' +function='0x0'/> + </controller> + <controller type='scsi' index='38' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x08' +function='0x0'/> + </controller> + <controller type='scsi' index='39' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x09' +function='0x0'/> + </controller> + <controller type='scsi' index='40' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x0a' +function='0x0'/> + </controller> + <controller type='scsi' index='41' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x0b' +function='0x0'/> + </controller> + <controller type='scsi' index='42' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x0c' +function='0x0'/> + </controller> + <controller type='scsi' index='43' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x0d' +function='0x0'/> + </controller> + <controller type='scsi' index='44' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' +function='0x0'/> + </controller> + <controller type='scsi' index='45' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x09' +function='0x0'/> + </controller> + <controller type='scsi' index='46' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0b' +function='0x0'/> + </controller> + <controller type='scsi' index='47' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0c' +function='0x0'/> + </controller> + <controller type='scsi' index='48' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0d' +function='0x0'/> + </controller> + <controller type='scsi' index='49' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0e' +function='0x0'/> + </controller> + <controller type='scsi' index='50' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0f' +function='0x0'/> + </controller> + <controller type='scsi' index='51' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x10' +function='0x0'/> + </controller> + <controller type='scsi' index='52' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x11' +function='0x0'/> + </controller> + <controller type='scsi' index='53' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x12' +function='0x0'/> + </controller> + <controller type='scsi' index='54' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x13' +function='0x0'/> + </controller> + <controller type='scsi' index='55' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x14' +function='0x0'/> + </controller> + <controller type='scsi' index='56' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x15' +function='0x0'/> + </controller> + <controller type='scsi' index='57' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x16' +function='0x0'/> + </controller> + <controller type='scsi' index='58' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x17' +function='0x0'/> + </controller> + <controller type='scsi' index='59' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x18' +function='0x0'/> + </controller> + <controller type='scsi' index='60' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x19' +function='0x0'/> + </controller> + <controller type='scsi' index='61' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1a' +function='0x0'/> + </controller> + <controller type='scsi' index='62' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1b' +function='0x0'/> + </controller> + <controller type='scsi' index='63' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1c' +function='0x0'/> + </controller> + <controller type='scsi' index='64' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1d' +function='0x0'/> + </controller> + <controller type='pci' index='0' model='pci-root'/> + <controller type='pci' index='1' model='pci-bridge'> + <model name='pci-bridge'/> + <target chassisNr='1'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1f' +function='0x0'/> + </controller> + <controller type='pci' index='2' model='pci-bridge'> + <model name='pci-bridge'/> + <target chassisNr='2'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1f' +function='0x0'/> + </controller> + </devices> + +vm disks xml (only virtio disks): + <devices> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native'/> + <source file='/vms/tempp/vm-os'/> + <target dev='vda' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x08' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data2'/> + <target dev='vdb' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x06' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data3'/> + <target dev='vdc' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x09' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data4'/> + <target dev='vdd' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0b' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data5'/> + <target dev='vde' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0c' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data6'/> + <target dev='vdf' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0d' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data7'/> + <target dev='vdg' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0e' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data8'/> + <target dev='vdh' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0f' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data9'/> + <target dev='vdi' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x10' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data10'/> + <target dev='vdj' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x11' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data11'/> + <target dev='vdk' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x12' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data12'/> + <target dev='vdl' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x13' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data13'/> + <target dev='vdm' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x14' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data14'/> + <target dev='vdn' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x15' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data15'/> + <target dev='vdo' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x16' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data16'/> + <target dev='vdp' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x17' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data17'/> + <target dev='vdq' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x18' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data18'/> + <target dev='vdr' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x19' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data19'/> + <target dev='vds' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1a' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data20'/> + <target dev='vdt' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1b' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data21'/> + <target dev='vdu' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1c' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data22'/> + <target dev='vdv' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1d' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data23'/> + <target dev='vdw' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1e' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data24'/> + <target dev='vdx' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x01' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data25'/> + <target dev='vdy' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x03' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data26'/> + <target dev='vdz' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x04' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data27'/> + <target dev='vdaa' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x05' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data28'/> + <target dev='vdab' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x06' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data29'/> + <target dev='vdac' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x07' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data30'/> + <target dev='vdad' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x08' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data31'/> + <target dev='vdae' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x09' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data32'/> + <target dev='vdaf' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0a' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data33'/> + <target dev='vdag' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0b' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data34'/> + <target dev='vdah' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0c' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data35'/> + <target dev='vdai' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0d' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data36'/> + <target dev='vdaj' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0e' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data37'/> + <target dev='vdak' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0f' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data38'/> + <target dev='vdal' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x10' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data39'/> + <target dev='vdam' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x11' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data40'/> + <target dev='vdan' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x12' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data41'/> + <target dev='vdao' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x13' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data42'/> + <target dev='vdap' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x14' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data43'/> + <target dev='vdaq' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x15' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data44'/> + <target dev='vdar' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x16' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data45'/> + <target dev='vdas' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x17' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data46'/> + <target dev='vdat' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x18' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data47'/> + <target dev='vdau' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x19' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data48'/> + <target dev='vdav' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1a' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data49'/> + <target dev='vdaw' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1b' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data50'/> + <target dev='vdax' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1c' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data51'/> + <target dev='vday' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1d' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data52'/> + <target dev='vdaz' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1e' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data53'/> + <target dev='vdba' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x01' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data54'/> + <target dev='vdbb' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x02' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data55'/> + <target dev='vdbc' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x03' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data56'/> + <target dev='vdbd' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x04' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data57'/> + <target dev='vdbe' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x05' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data58'/> + <target dev='vdbf' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x06' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data59'/> + <target dev='vdbg' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x07' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data60'/> + <target dev='vdbh' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x08' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data61'/> + <target dev='vdbi' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x09' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data62'/> + <target dev='vdbj' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x0a' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data63'/> + <target dev='vdbk' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x0b' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data1'/> + <target dev='vdbl' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' +function='0x0'/> + </disk> + <controller type='pci' index='0' model='pci-root'/> + <controller type='pci' index='1' model='pci-bridge'> + <model name='pci-bridge'/> + <target chassisNr='1'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1f' +function='0x0'/> + </controller> + <controller type='pci' index='2' model='pci-bridge'> + <model name='pci-bridge'/> + <target chassisNr='2'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1f' +function='0x0'/> + </controller> + </devices> + +> +> (3) migrate vm and vmâdisks +> +> +What do you mean by 'and vm disks' - are you doing a block migration? +> +Yes, block migration. +In fact, only migration domain also reproduced. + +> +Dave +> +> +> ---------------------------------------------------------------------- +> +> --------------------------------------------------------------- +> +Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK +------------------------------------------------------------------------------------------------------------------------------------- +æ¬é®ä»¶åå ¶é件嫿æ°åä¸éå¢çä¿å¯ä¿¡æ¯ï¼ä» éäºåéç»ä¸é¢å°åä¸ååº +ç个人æç¾¤ç»ãç¦æ¢ä»»ä½å ¶ä»äººä»¥ä»»ä½å½¢å¼ä½¿ç¨ï¼å æ¬ä½ä¸éäºå ¨é¨æé¨åå°æ³é²ãå¤å¶ã +ææ£åï¼æ¬é®ä»¶ä¸çä¿¡æ¯ã妿æ¨éæ¶äºæ¬é®ä»¶ï¼è¯·æ¨ç«å³çµè¯æé®ä»¶éç¥å件人并å 餿¬ +é®ä»¶ï¼ +This e-mail and its attachments contain confidential information from New H3C, +which is +intended only for the person or entity whose address is listed above. Any use +of the +information contained herein in any way (including, but not limited to, total +or partial +disclosure, reproduction, or dissemination) by persons other than the intended +recipient(s) is prohibited. If you receive this e-mail in error, please notify +the sender +by phone or email immediately and delete it! + diff --git a/results/classifier/zero-shot/003/KVM/80615920 b/results/classifier/zero-shot/003/KVM/80615920 new file mode 100644 index 000000000..f6b9b5c9c --- /dev/null +++ b/results/classifier/zero-shot/003/KVM/80615920 @@ -0,0 +1,351 @@ +KVM: 0.803 +mistranslation: 0.800 +other: 0.786 +instruction: 0.751 +boot: 0.750 +semantic: 0.737 +network: 0.732 + +[BUG] accel/tcg: cpu_exec_longjmp_cleanup: assertion failed: (cpu == current_cpu) + +It seems there is a bug in SIGALRM handling when 486 system emulates x86_64 +code. + +This code: + +#include <stdio.h> +#include <stdlib.h> +#include <pthread.h> +#include <signal.h> +#include <unistd.h> + +pthread_t thread1, thread2; + +// Signal handler for SIGALRM +void alarm_handler(int sig) { + // Do nothing, just wake up the other thread +} + +// Thread 1 function +void* thread1_func(void* arg) { + // Set up the signal handler for SIGALRM + signal(SIGALRM, alarm_handler); + + // Wait for 5 seconds + sleep(1); + + // Send SIGALRM signal to thread 2 + pthread_kill(thread2, SIGALRM); + + return NULL; +} + +// Thread 2 function +void* thread2_func(void* arg) { + // Wait for the SIGALRM signal + pause(); + + printf("Thread 2 woke up!\n"); + + return NULL; +} + +int main() { + // Create thread 1 + if (pthread_create(&thread1, NULL, thread1_func, NULL) != 0) { + fprintf(stderr, "Failed to create thread 1\n"); + return 1; + } + + // Create thread 2 + if (pthread_create(&thread2, NULL, thread2_func, NULL) != 0) { + fprintf(stderr, "Failed to create thread 2\n"); + return 1; + } + + // Wait for both threads to finish + pthread_join(thread1, NULL); + pthread_join(thread2, NULL); + + return 0; +} + + +Fails with this -strace log (there are also unsupported syscalls 334 and 435, +but it seems it doesn't affect the code much): + +... +736 rt_sigaction(SIGALRM,0x000000001123ec20,0x000000001123ecc0) = 0 +736 clock_nanosleep(CLOCK_REALTIME,0,{tv_sec = 1,tv_nsec = 0},{tv_sec = +1,tv_nsec = 0}) +736 rt_sigprocmask(SIG_BLOCK,0x00000000109fad20,0x0000000010800b38,8) = 0 +736 Unknown syscall 435 +736 +clone(CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID| + ... +736 rt_sigprocmask(SIG_SETMASK,0x0000000010800b38,NULL,8) +736 set_robust_list(0x11a419a0,0) = -1 errno=38 (Function not implemented) +736 rt_sigprocmask(SIG_SETMASK,0x0000000011a41fb0,NULL,8) = 0 + = 0 +736 pause(0,0,2,277186368,0,295966400) +736 +futex(0x000000001123f990,FUTEX_CLOCK_REALTIME|FUTEX_WAIT_BITSET,738,NULL,NULL,0) + = 0 +736 rt_sigprocmask(SIG_BLOCK,0x00000000109fad20,0x000000001123ee88,8) = 0 +736 getpid() = 736 +736 tgkill(736,739,SIGALRM) = 0 + = -1 errno=4 (Interrupted system call) +--- SIGALRM {si_signo=SIGALRM, si_code=SI_TKILL, si_pid=736, si_uid=0} --- +0x48874a != 0x3c69e10 +736 rt_sigprocmask(SIG_SETMASK,0x000000001123ee88,NULL,8) = 0 +** +ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion failed: +(cpu == current_cpu) +Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion +failed: (cpu == current_cpu) +0x48874a != 0x3c69e10 +** +ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion failed: +(cpu == current_cpu) +Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion +failed: (cpu == current_cpu) +# + +The code fails either with or without -singlestep, the command line: + +/usr/bin/qemu-x86_64 -L /opt/x86_64 -strace -singlestep /opt/x86_64/alarm.bin + +Source code of QEMU 8.1.1 was modified with patch "[PATCH] qemu/timer: Don't +use RDTSC on i486" [1], +with added few ioctls (not relevant) and cpu_exec_longjmp_cleanup() now prints +current pointers of +cpu and current_cpu (line "0x48874a != 0x3c69e10"). + +config.log (built as a part of buildroot, basically the minimal possible +configuration for running x86_64 on 486): + +# Configured with: +'/mnt/hd_8tb_p1/p1/home/crossgen/buildroot_486_2/output/build/qemu-8.1.1/configure' + '--prefix=/usr' +'--cross-prefix=/mnt/hd_8tb_p1/p1/home/crossgen/buildroot_486_2/output/host/bin/i486-buildroot-linux-gnu-' + '--audio-drv-list=' +'--python=/mnt/hd_8tb_p1/p1/home/crossgen/buildroot_486_2/output/host/bin/python3' + +'--ninja=/mnt/hd_8tb_p1/p1/home/crossgen/buildroot_486_2/output/host/bin/ninja' +'--disable-alsa' '--disable-bpf' '--disable-brlapi' '--disable-bsd-user' +'--disable-cap-ng' '--disable-capstone' '--disable-containers' +'--disable-coreaudio' '--disable-curl' '--disable-curses' +'--disable-dbus-display' '--disable-docs' '--disable-dsound' '--disable-hvf' +'--disable-jack' '--disable-libiscsi' '--disable-linux-aio' +'--disable-linux-io-uring' '--disable-malloc-trim' '--disable-membarrier' +'--disable-mpath' '--disable-netmap' '--disable-opengl' '--disable-oss' +'--disable-pa' '--disable-rbd' '--disable-sanitizers' '--disable-selinux' +'--disable-sparse' '--disable-strip' '--disable-vde' '--disable-vhost-crypto' +'--disable-vhost-user-blk-server' '--disable-virtfs' '--disable-whpx' +'--disable-xen' '--disable-attr' '--disable-kvm' '--disable-vhost-net' +'--disable-download' '--disable-hexagon-idef-parser' '--disable-system' +'--enable-linux-user' '--target-list=x86_64-linux-user' '--disable-vhost-user' +'--disable-slirp' '--disable-sdl' '--disable-fdt' '--enable-trace-backends=nop' +'--disable-tools' '--disable-guest-agent' '--disable-fuse' +'--disable-fuse-lseek' '--disable-seccomp' '--disable-libssh' +'--disable-libusb' '--disable-vnc' '--disable-nettle' '--disable-numa' +'--disable-pipewire' '--disable-spice' '--disable-usb-redir' +'--disable-install-blobs' + +Emulation of the same x86_64 code with qemu 6.2.0 installed on another x86_64 +native machine works fine. + +[1] +https://lists.nongnu.org/archive/html/qemu-devel/2023-11/msg05387.html +Best regards, +Petr + +On Sat, 25 Nov 2023 at 13:09, Petr Cvek <petrcvekcz@gmail.com> wrote: +> +> +It seems there is a bug in SIGALRM handling when 486 system emulates x86_64 +> +code. +486 host is pretty well out of support currently. Can you reproduce +this on a less ancient host CPU type ? + +> +ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion failed: +> +(cpu == current_cpu) +> +Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: +> +assertion failed: (cpu == current_cpu) +> +0x48874a != 0x3c69e10 +> +** +> +ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion failed: +> +(cpu == current_cpu) +> +Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: +> +assertion failed: (cpu == current_cpu) +What compiler version do you build QEMU with? That +assert is there because we have seen some buggy compilers +in the past which don't correctly preserve the variable +value as the setjmp/longjmp spec requires them to. + +thanks +-- PMM + +Dne 27. 11. 23 v 10:37 Peter Maydell napsal(a): +> +On Sat, 25 Nov 2023 at 13:09, Petr Cvek <petrcvekcz@gmail.com> wrote: +> +> +> +> It seems there is a bug in SIGALRM handling when 486 system emulates x86_64 +> +> code. +> +> +486 host is pretty well out of support currently. Can you reproduce +> +this on a less ancient host CPU type ? +> +It seems it only fails when the code is compiled for i486. QEMU built with the +same compiler with -march=i586 and above runs on the same physical hardware +without a problem. All -march= variants were executed on ryzen 3600. + +> +> ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion +> +> failed: (cpu == current_cpu) +> +> Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: +> +> assertion failed: (cpu == current_cpu) +> +> 0x48874a != 0x3c69e10 +> +> ** +> +> ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion +> +> failed: (cpu == current_cpu) +> +> Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: +> +> assertion failed: (cpu == current_cpu) +> +> +What compiler version do you build QEMU with? That +> +assert is there because we have seen some buggy compilers +> +in the past which don't correctly preserve the variable +> +value as the setjmp/longjmp spec requires them to. +> +i486 and i586+ code variants were compiled with GCC 13.2.0 (more exactly, +slackware64 current multilib distribution). + +i486 binary which runs on the real 486 is also GCC 13.2.0 and installed as a +part of the buildroot crosscompiler (about two week old git snapshot). + +> +thanks +> +-- PMM +best regards, +Petr + +On 11/25/23 07:08, Petr Cvek wrote: +ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion failed: +(cpu == current_cpu) +Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion +failed: (cpu == current_cpu) +# + +The code fails either with or without -singlestep, the command line: + +/usr/bin/qemu-x86_64 -L /opt/x86_64 -strace -singlestep /opt/x86_64/alarm.bin + +Source code of QEMU 8.1.1 was modified with patch "[PATCH] qemu/timer: Don't use +RDTSC on i486" [1], +with added few ioctls (not relevant) and cpu_exec_longjmp_cleanup() now prints +current pointers of +cpu and current_cpu (line "0x48874a != 0x3c69e10"). +If you try this again with 8.2-rc2, you should not see an assertion failure. +You should see instead + +QEMU internal SIGILL {code=ILLOPC, addr=0x12345678} +which I think more accurately summarizes the situation of attempting RDTSC on hardware +that does not support it. +r~ + +Dne 29. 11. 23 v 15:25 Richard Henderson napsal(a): +> +On 11/25/23 07:08, Petr Cvek wrote: +> +> ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion +> +> failed: (cpu == current_cpu) +> +> Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: +> +> assertion failed: (cpu == current_cpu) +> +> # +> +> +> +> The code fails either with or without -singlestep, the command line: +> +> +> +> /usr/bin/qemu-x86_64 -L /opt/x86_64 -strace -singlestep +> +> /opt/x86_64/alarm.bin +> +> +> +> Source code of QEMU 8.1.1 was modified with patch "[PATCH] qemu/timer: Don't +> +> use RDTSC on i486" [1], +> +> with added few ioctls (not relevant) and cpu_exec_longjmp_cleanup() now +> +> prints current pointers of +> +> cpu and current_cpu (line "0x48874a != 0x3c69e10"). +> +> +> +If you try this again with 8.2-rc2, you should not see an assertion failure. +> +You should see instead +> +> +QEMU internal SIGILL {code=ILLOPC, addr=0x12345678} +> +> +which I think more accurately summarizes the situation of attempting RDTSC on +> +hardware that does not support it. +> +> +Compilation of vanilla qemu v8.2.0-rc2 with -march=i486 by GCC 13.2.0 and +running the resulting binary on ryzen still leads to: + +** +ERROR:../accel/tcg/cpu-exec.c:533:cpu_exec_longjmp_cleanup: assertion failed: +(cpu == current_cpu) +Bail out! ERROR:../accel/tcg/cpu-exec.c:533:cpu_exec_longjmp_cleanup: assertion +failed: (cpu == current_cpu) +Aborted + +> +> +r~ +Petr + |