diff options
Diffstat (limited to 'classification_output/03/KVM')
| -rw-r--r-- | classification_output/03/KVM/04472277 | 579 | ||||
| -rw-r--r-- | classification_output/03/KVM/26430026 | 168 | ||||
| -rw-r--r-- | classification_output/03/KVM/33802194 | 4942 | ||||
| -rw-r--r-- | classification_output/03/KVM/42613410 | 152 | ||||
| -rw-r--r-- | classification_output/03/KVM/43643137 | 541 | ||||
| -rw-r--r-- | classification_output/03/KVM/55961334 | 42 | ||||
| -rw-r--r-- | classification_output/03/KVM/71456293 | 1489 | ||||
| -rw-r--r-- | classification_output/03/KVM/80615920 | 351 |
8 files changed, 0 insertions, 8264 deletions
diff --git a/classification_output/03/KVM/04472277 b/classification_output/03/KVM/04472277 deleted file mode 100644 index a8ac3b596..000000000 --- a/classification_output/03/KVM/04472277 +++ /dev/null @@ -1,579 +0,0 @@ -KVM: 0.890 -network: 0.847 -other: 0.846 -instruction: 0.845 -boot: 0.831 -mistranslation: 0.817 -semantic: 0.815 - -[BUG][KVM_SET_USER_MEMORY_REGION] KVM_SET_USER_MEMORY_REGION failed - -Hi all, -I start a VM in openstack, and openstack use libvirt to start qemu VM, but now log show this ERROR. -Is there any one know this? -The ERROR log from /var/log/libvirt/qemu/instance-0000000e.log -``` -2023-03-14T10:09:17.674114Z qemu-system-x86_64: kvm_set_user_memory_region: KVM_SET_USER_MEMORY_REGION failed, slot=4, start=0xfffffffffe000000, size=0x2000: Invalid argument -kvm_set_phys_mem: error registering slot: Invalid argument -2023-03-14 10:09:18.198+0000: shutting down, reason=crashed -``` -The xml file -``` -root@c1c2:~# cat /etc/libvirt/qemu/instance-0000000e.xml -<!-- -WARNING: THIS IS AN AUTO-GENERATED FILE. CHANGES TO IT ARE LIKELY TO BE -OVERWRITTEN AND LOST. Changes to this xml configuration should be made using: - virsh edit instance-0000000e -or other application using the libvirt API. ---> -<domain type='kvm'> - <name>instance-0000000e</name> - <uuid>ff91d2dc-69a1-43ef-abde-c9e4e9a0305b</uuid> - <metadata> -  <nova:instance xmlns:nova=" -http://openstack.org/xmlns/libvirt/nova/1.1 -"> -   <nova:package version="25.1.0"/> -   <nova:name>provider-instance</nova:name> -   <nova:creationTime>2023-03-14 10:09:13</nova:creationTime> -   <nova:flavor name="cirros-os-dpu-test-1"> -    <nova:memory>64</nova:memory> -    <nova:disk>1</nova:disk> -    <nova:swap>0</nova:swap> -    <nova:ephemeral>0</nova:ephemeral> -    <nova:vcpus>1</nova:vcpus> -   </nova:flavor> -   <nova:owner> -    <nova:user uuid="ff627ad39ed94479b9c5033bc462cf78">admin</nova:user> -    <nova:project uuid="512866f9994f4ad8916d8539a7cdeec9">admin</nova:project> -   </nova:owner> -   <nova:root type="image" uuid="9e58cb69-316a-4093-9f23-c1d1bd8edffe"/> -   <nova:ports> -    <nova:port uuid="77c1dc00-af39-4463-bea0-12808f4bc340"> -     <nova:ip type="fixed" address="172.1.1.43" ipVersion="4"/> -    </nova:port> -   </nova:ports> -  </nova:instance> - </metadata> - <memory unit='KiB'>65536</memory> - <currentMemory unit='KiB'>65536</currentMemory> - <vcpu placement='static'>1</vcpu> - <sysinfo type='smbios'> -  <system> -   <entry name='manufacturer'>OpenStack Foundation</entry> -   <entry name='product'>OpenStack Nova</entry> -   <entry name='version'>25.1.0</entry> -   <entry name='serial'>ff91d2dc-69a1-43ef-abde-c9e4e9a0305b</entry> -   <entry name='uuid'>ff91d2dc-69a1-43ef-abde-c9e4e9a0305b</entry> -   <entry name='family'>Virtual Machine</entry> -  </system> - </sysinfo> - <os> -  <type arch='x86_64' machine='pc-i440fx-6.2'>hvm</type> -  <boot dev='hd'/> -  <smbios mode='sysinfo'/> - </os> - <features> -  <acpi/> -  <apic/> -  <vmcoreinfo state='on'/> - </features> - <cpu mode='host-model' check='partial'> -  <topology sockets='1' dies='1' cores='1' threads='1'/> - </cpu> - <clock offset='utc'> -  <timer name='pit' tickpolicy='delay'/> -  <timer name='rtc' tickpolicy='catchup'/> -  <timer name='hpet' present='no'/> - </clock> - <on_poweroff>destroy</on_poweroff> - <on_reboot>restart</on_reboot> - <on_crash>destroy</on_crash> - <devices> -  <emulator>/usr/bin/qemu-system-x86_64</emulator> -  <disk type='file' device='disk'> -   <driver name='qemu' type='qcow2' cache='none'/> -   <source file='/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/disk'/> -   <target dev='vda' bus='virtio'/> -   <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> -  </disk> -  <controller type='usb' index='0' model='piix3-uhci'> -   <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/> -  </controller> -  <controller type='pci' index='0' model='pci-root'/> -  <interface type='hostdev' managed='yes'> -   <mac address='fa:16:3e:aa:d9:23'/> -   <source> -    <address type='pci' domain='0x0000' bus='0x01' slot='0x00' function='0x5'/> -   </source> -   <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/> -  </interface> -  <serial type='pty'> -   <log file='/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/console.log' append='off'/> -   <target type='isa-serial' port='0'> -    <model name='isa-serial'/> -   </target> -  </serial> -  <console type='pty'> -   <log file='/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/console.log' append='off'/> -   <target type='serial' port='0'/> -  </console> -  <input type='tablet' bus='usb'> -   <address type='usb' bus='0' port='1'/> -  </input> -  <input type='mouse' bus='ps2'/> -  <input type='keyboard' bus='ps2'/> -  <graphics type='vnc' port='-1' autoport='yes' listen='0.0.0.0'> -   <listen type='address' address='0.0.0.0'/> -  </graphics> -  <audio id='1' type='none'/> -  <video> -   <model type='virtio' heads='1' primary='yes'/> -   <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> -  </video> -  <hostdev mode='subsystem' type='pci' managed='yes'> -   <source> -    <address domain='0x0000' bus='0x01' slot='0x00' function='0x6'/> -   </source> -   <address type='pci' domain='0x0000' bus='0x00' slot='0x05' function='0x0'/> -  </hostdev> -  <memballoon model='virtio'> -   <stats period='10'/> -   <address type='pci' domain='0x0000' bus='0x00' slot='0x06' function='0x0'/> -  </memballoon> -  <rng model='virtio'> -   <backend model='random'>/dev/urandom</backend> -   <address type='pci' domain='0x0000' bus='0x00' slot='0x07' function='0x0'/> -  </rng> - </devices> -</domain> -``` ----- -Simon Jones - -This is happened in ubuntu22.04. -QEMU is install by apt like this: -apt install -y qemu qemu-kvm qemu-system -and QEMU version is 6.2.0 ----- -Simon Jones -Simon Jones < -batmanustc@gmail.com -> äº2023å¹´3æ21æ¥å¨äº 08:40åéï¼ -Hi all, -I start a VM in openstack, and openstack use libvirt to start qemu VM, but now log show this ERROR. -Is there any one know this? -The ERROR log from /var/log/libvirt/qemu/instance-0000000e.log -``` -2023-03-14T10:09:17.674114Z qemu-system-x86_64: kvm_set_user_memory_region: KVM_SET_USER_MEMORY_REGION failed, slot=4, start=0xfffffffffe000000, size=0x2000: Invalid argument -kvm_set_phys_mem: error registering slot: Invalid argument -2023-03-14 10:09:18.198+0000: shutting down, reason=crashed -``` -The xml file -``` -root@c1c2:~# cat /etc/libvirt/qemu/instance-0000000e.xml -<!-- -WARNING: THIS IS AN AUTO-GENERATED FILE. CHANGES TO IT ARE LIKELY TO BE -OVERWRITTEN AND LOST. Changes to this xml configuration should be made using: - virsh edit instance-0000000e -or other application using the libvirt API. ---> -<domain type='kvm'> - <name>instance-0000000e</name> - <uuid>ff91d2dc-69a1-43ef-abde-c9e4e9a0305b</uuid> - <metadata> -  <nova:instance xmlns:nova=" -http://openstack.org/xmlns/libvirt/nova/1.1 -"> -   <nova:package version="25.1.0"/> -   <nova:name>provider-instance</nova:name> -   <nova:creationTime>2023-03-14 10:09:13</nova:creationTime> -   <nova:flavor name="cirros-os-dpu-test-1"> -    <nova:memory>64</nova:memory> -    <nova:disk>1</nova:disk> -    <nova:swap>0</nova:swap> -    <nova:ephemeral>0</nova:ephemeral> -    <nova:vcpus>1</nova:vcpus> -   </nova:flavor> -   <nova:owner> -    <nova:user uuid="ff627ad39ed94479b9c5033bc462cf78">admin</nova:user> -    <nova:project uuid="512866f9994f4ad8916d8539a7cdeec9">admin</nova:project> -   </nova:owner> -   <nova:root type="image" uuid="9e58cb69-316a-4093-9f23-c1d1bd8edffe"/> -   <nova:ports> -    <nova:port uuid="77c1dc00-af39-4463-bea0-12808f4bc340"> -     <nova:ip type="fixed" address="172.1.1.43" ipVersion="4"/> -    </nova:port> -   </nova:ports> -  </nova:instance> - </metadata> - <memory unit='KiB'>65536</memory> - <currentMemory unit='KiB'>65536</currentMemory> - <vcpu placement='static'>1</vcpu> - <sysinfo type='smbios'> -  <system> -   <entry name='manufacturer'>OpenStack Foundation</entry> -   <entry name='product'>OpenStack Nova</entry> -   <entry name='version'>25.1.0</entry> -   <entry name='serial'>ff91d2dc-69a1-43ef-abde-c9e4e9a0305b</entry> -   <entry name='uuid'>ff91d2dc-69a1-43ef-abde-c9e4e9a0305b</entry> -   <entry name='family'>Virtual Machine</entry> -  </system> - </sysinfo> - <os> -  <type arch='x86_64' machine='pc-i440fx-6.2'>hvm</type> -  <boot dev='hd'/> -  <smbios mode='sysinfo'/> - </os> - <features> -  <acpi/> -  <apic/> -  <vmcoreinfo state='on'/> - </features> - <cpu mode='host-model' check='partial'> -  <topology sockets='1' dies='1' cores='1' threads='1'/> - </cpu> - <clock offset='utc'> -  <timer name='pit' tickpolicy='delay'/> -  <timer name='rtc' tickpolicy='catchup'/> -  <timer name='hpet' present='no'/> - </clock> - <on_poweroff>destroy</on_poweroff> - <on_reboot>restart</on_reboot> - <on_crash>destroy</on_crash> - <devices> -  <emulator>/usr/bin/qemu-system-x86_64</emulator> -  <disk type='file' device='disk'> -   <driver name='qemu' type='qcow2' cache='none'/> -   <source file='/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/disk'/> -   <target dev='vda' bus='virtio'/> -   <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> -  </disk> -  <controller type='usb' index='0' model='piix3-uhci'> -   <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/> -  </controller> -  <controller type='pci' index='0' model='pci-root'/> -  <interface type='hostdev' managed='yes'> -   <mac address='fa:16:3e:aa:d9:23'/> -   <source> -    <address type='pci' domain='0x0000' bus='0x01' slot='0x00' function='0x5'/> -   </source> -   <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/> -  </interface> -  <serial type='pty'> -   <log file='/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/console.log' append='off'/> -   <target type='isa-serial' port='0'> -    <model name='isa-serial'/> -   </target> -  </serial> -  <console type='pty'> -   <log file='/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/console.log' append='off'/> -   <target type='serial' port='0'/> -  </console> -  <input type='tablet' bus='usb'> -   <address type='usb' bus='0' port='1'/> -  </input> -  <input type='mouse' bus='ps2'/> -  <input type='keyboard' bus='ps2'/> -  <graphics type='vnc' port='-1' autoport='yes' listen='0.0.0.0'> -   <listen type='address' address='0.0.0.0'/> -  </graphics> -  <audio id='1' type='none'/> -  <video> -   <model type='virtio' heads='1' primary='yes'/> -   <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> -  </video> -  <hostdev mode='subsystem' type='pci' managed='yes'> -   <source> -    <address domain='0x0000' bus='0x01' slot='0x00' function='0x6'/> -   </source> -   <address type='pci' domain='0x0000' bus='0x00' slot='0x05' function='0x0'/> -  </hostdev> -  <memballoon model='virtio'> -   <stats period='10'/> -   <address type='pci' domain='0x0000' bus='0x00' slot='0x06' function='0x0'/> -  </memballoon> -  <rng model='virtio'> -   <backend model='random'>/dev/urandom</backend> -   <address type='pci' domain='0x0000' bus='0x00' slot='0x07' function='0x0'/> -  </rng> - </devices> -</domain> -``` ----- -Simon Jones - -This is full ERROR log -2023-03-23 08:00:52.362+0000: starting up libvirt version: 8.0.0, package: 1ubuntu7.4 (Christian Ehrhardt < -christian.ehrhardt@canonical.com -> Tue, 22 Nov 2022 15:59:28 +0100), qemu version: 6.2.0Debian 1:6.2+dfsg-2ubuntu6.6, kernel: 5.19.0-35-generic, hostname: c1c2 -LC_ALL=C \ -PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin \ -HOME=/var/lib/libvirt/qemu/domain-4-instance-0000000e \ -XDG_DATA_HOME=/var/lib/libvirt/qemu/domain-4-instance-0000000e/.local/share \ -XDG_CACHE_HOME=/var/lib/libvirt/qemu/domain-4-instance-0000000e/.cache \ -XDG_CONFIG_HOME=/var/lib/libvirt/qemu/domain-4-instance-0000000e/.config \ -/usr/bin/qemu-system-x86_64 \ --name guest=instance-0000000e,debug-threads=on \ --S \ --object '{"qom-type":"secret","id":"masterKey0","format":"raw","file":"/var/lib/libvirt/qemu/domain-4-instance-0000000e/master-key.aes"}' \ --machine pc-i440fx-6.2,usb=off,dump-guest-core=off,memory-backend=pc.ram \ --accel kvm \ --cpu Cooperlake,ss=on,vmx=on,pdcm=on,hypervisor=on,tsc-adjust=on,sha-ni=on,umip=on,waitpkg=on,gfni=on,vaes=on,vpclmulqdq=on,rdpid=on,movdiri=on,movdir64b=on,fsrm=on,md-clear=on,avx-vnni=on,xsaves=on,ibpb=on,ibrs=on,amd-stibp=on,amd-ssbd=on,hle=off,rtm=off,avx512f=off,avx512dq=off,avx512cd=off,avx512bw=off,avx512vl=off,avx512vnni=off,avx512-bf16=off,taa-no=off \ --m 64 \ --object '{"qom-type":"memory-backend-ram","id":"pc.ram","size":67108864}' \ --overcommit mem-lock=off \ --smp 1,sockets=1,dies=1,cores=1,threads=1 \ --uuid ff91d2dc-69a1-43ef-abde-c9e4e9a0305b \ --smbios 'type=1,manufacturer=OpenStack Foundation,product=OpenStack Nova,version=25.1.0,serial=ff91d2dc-69a1-43ef-abde-c9e4e9a0305b,uuid=ff91d2dc-69a1-43ef-abde-c9e4e9a0305b,family=Virtual Machine' \ --no-user-config \ --nodefaults \ --chardev socket,id=charmonitor,fd=33,server=on,wait=off \ --mon chardev=charmonitor,id=monitor,mode=control \ --rtc base=utc,driftfix=slew \ --global kvm-pit.lost_tick_policy=delay \ --no-hpet \ --no-shutdown \ --boot strict=on \ --device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 \ --blockdev '{"driver":"file","filename":"/var/lib/nova/instances/_base/8b58db82a488248e7c5e769599954adaa47a5314","node-name":"libvirt-2-storage","cache":{"direct":true,"no-flush":false},"auto-read-only":true,"discard":"unmap"}' \ --blockdev '{"node-name":"libvirt-2-format","read-only":true,"cache":{"direct":true,"no-flush":false},"driver":"raw","file":"libvirt-2-storage"}' \ --blockdev '{"driver":"file","filename":"/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/disk","node-name":"libvirt-1-storage","cache":{"direct":true,"no-flush":false},"auto-read-only":true,"discard":"unmap"}' \ --blockdev '{"node-name":"libvirt-1-format","read-only":false,"cache":{"direct":true,"no-flush":false},"driver":"qcow2","file":"libvirt-1-storage","backing":"libvirt-2-format"}' \ --device virtio-blk-pci,bus=pci.0,addr=0x3,drive=libvirt-1-format,id=virtio-disk0,bootindex=1,write-cache=on \ --add-fd set=1,fd=34 \ --chardev pty,id=charserial0,logfile=/dev/fdset/1,logappend=on \ --device isa-serial,chardev=charserial0,id=serial0 \ --device usb-tablet,id=input0,bus=usb.0,port=1 \ --audiodev '{"id":"audio1","driver":"none"}' \ --vnc -0.0.0.0:0 -,audiodev=audio1 \ --device virtio-vga,id=video0,max_outputs=1,bus=pci.0,addr=0x2 \ --device vfio-pci,host=0000:01:00.5,id=hostdev0,bus=pci.0,addr=0x4 \ --device vfio-pci,host=0000:01:00.6,id=hostdev1,bus=pci.0,addr=0x5 \ --device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x6 \ --object '{"qom-type":"rng-random","id":"objrng0","filename":"/dev/urandom"}' \ --device virtio-rng-pci,rng=objrng0,id=rng0,bus=pci.0,addr=0x7 \ --device vmcoreinfo \ --sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny \ --msg timestamp=on -char device redirected to /dev/pts/3 (label charserial0) -2023-03-23T08:00:53.728550Z qemu-system-x86_64: kvm_set_user_memory_region: KVM_SET_USER_MEMORY_REGION failed, slot=4, start=0xfffffffffe000000, size=0x2000: Invalid argument -kvm_set_phys_mem: error registering slot: Invalid argument -2023-03-23 08:00:54.201+0000: shutting down, reason=crashed -2023-03-23 08:54:43.468+0000: starting up libvirt version: 8.0.0, package: 1ubuntu7.4 (Christian Ehrhardt < -christian.ehrhardt@canonical.com -> Tue, 22 Nov 2022 15:59:28 +0100), qemu version: 6.2.0Debian 1:6.2+dfsg-2ubuntu6.6, kernel: 5.19.0-35-generic, hostname: c1c2 -LC_ALL=C \ -PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin \ -HOME=/var/lib/libvirt/qemu/domain-5-instance-0000000e \ -XDG_DATA_HOME=/var/lib/libvirt/qemu/domain-5-instance-0000000e/.local/share \ -XDG_CACHE_HOME=/var/lib/libvirt/qemu/domain-5-instance-0000000e/.cache \ -XDG_CONFIG_HOME=/var/lib/libvirt/qemu/domain-5-instance-0000000e/.config \ -/usr/bin/qemu-system-x86_64 \ --name guest=instance-0000000e,debug-threads=on \ --S \ --object '{"qom-type":"secret","id":"masterKey0","format":"raw","file":"/var/lib/libvirt/qemu/domain-5-instance-0000000e/master-key.aes"}' \ --machine pc-i440fx-6.2,usb=off,dump-guest-core=off,memory-backend=pc.ram \ --accel kvm \ --cpu Cooperlake,ss=on,vmx=on,pdcm=on,hypervisor=on,tsc-adjust=on,sha-ni=on,umip=on,waitpkg=on,gfni=on,vaes=on,vpclmulqdq=on,rdpid=on,movdiri=on,movdir64b=on,fsrm=on,md-clear=on,avx-vnni=on,xsaves=on,ibpb=on,ibrs=on,amd-stibp=on,amd-ssbd=on,hle=off,rtm=off,avx512f=off,avx512dq=off,avx512cd=off,avx512bw=off,avx512vl=off,avx512vnni=off,avx512-bf16=off,taa-no=off \ --m 64 \ --object '{"qom-type":"memory-backend-ram","id":"pc.ram","size":67108864}' \ --overcommit mem-lock=off \ --smp 1,sockets=1,dies=1,cores=1,threads=1 \ --uuid ff91d2dc-69a1-43ef-abde-c9e4e9a0305b \ --smbios 'type=1,manufacturer=OpenStack Foundation,product=OpenStack Nova,version=25.1.0,serial=ff91d2dc-69a1-43ef-abde-c9e4e9a0305b,uuid=ff91d2dc-69a1-43ef-abde-c9e4e9a0305b,family=Virtual Machine' \ --no-user-config \ --nodefaults \ --chardev socket,id=charmonitor,fd=33,server=on,wait=off \ --mon chardev=charmonitor,id=monitor,mode=control \ --rtc base=utc,driftfix=slew \ --global kvm-pit.lost_tick_policy=delay \ --no-hpet \ --no-shutdown \ --boot strict=on \ --device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 \ --blockdev '{"driver":"file","filename":"/var/lib/nova/instances/_base/8b58db82a488248e7c5e769599954adaa47a5314","node-name":"libvirt-2-storage","cache":{"direct":true,"no-flush":false},"auto-read-only":true,"discard":"unmap"}' \ --blockdev '{"node-name":"libvirt-2-format","read-only":true,"cache":{"direct":true,"no-flush":false},"driver":"raw","file":"libvirt-2-storage"}' \ --blockdev '{"driver":"file","filename":"/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/disk","node-name":"libvirt-1-storage","cache":{"direct":true,"no-flush":false},"auto-read-only":true,"discard":"unmap"}' \ --blockdev '{"node-name":"libvirt-1-format","read-only":false,"cache":{"direct":true,"no-flush":false},"driver":"qcow2","file":"libvirt-1-storage","backing":"libvirt-2-format"}' \ --device virtio-blk-pci,bus=pci.0,addr=0x3,drive=libvirt-1-format,id=virtio-disk0,bootindex=1,write-cache=on \ --add-fd set=1,fd=34 \ --chardev pty,id=charserial0,logfile=/dev/fdset/1,logappend=on \ --device isa-serial,chardev=charserial0,id=serial0 \ --device usb-tablet,id=input0,bus=usb.0,port=1 \ --audiodev '{"id":"audio1","driver":"none"}' \ --vnc -0.0.0.0:0 -,audiodev=audio1 \ --device virtio-vga,id=video0,max_outputs=1,bus=pci.0,addr=0x2 \ --device vfio-pci,host=0000:01:00.5,id=hostdev0,bus=pci.0,addr=0x4 \ --device vfio-pci,host=0000:01:00.6,id=hostdev1,bus=pci.0,addr=0x5 \ --device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x6 \ --object '{"qom-type":"rng-random","id":"objrng0","filename":"/dev/urandom"}' \ --device virtio-rng-pci,rng=objrng0,id=rng0,bus=pci.0,addr=0x7 \ --device vmcoreinfo \ --sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny \ --msg timestamp=on -char device redirected to /dev/pts/3 (label charserial0) -2023-03-23T08:54:44.755039Z qemu-system-x86_64: kvm_set_user_memory_region: KVM_SET_USER_MEMORY_REGION failed, slot=4, start=0xfffffffffe000000, size=0x2000: Invalid argument -kvm_set_phys_mem: error registering slot: Invalid argument -2023-03-23 08:54:45.230+0000: shutting down, reason=crashed ----- -Simon Jones -Simon Jones < -batmanustc@gmail.com -> äº2023å¹´3æ23æ¥å¨å 05:49åéï¼ -This is happened in ubuntu22.04. -QEMU is install by apt like this: -apt install -y qemu qemu-kvm qemu-system -and QEMU version is 6.2.0 ----- -Simon Jones -Simon Jones < -batmanustc@gmail.com -> äº2023å¹´3æ21æ¥å¨äº 08:40åéï¼ -Hi all, -I start a VM in openstack, and openstack use libvirt to start qemu VM, but now log show this ERROR. -Is there any one know this? -The ERROR log from /var/log/libvirt/qemu/instance-0000000e.log -``` -2023-03-14T10:09:17.674114Z qemu-system-x86_64: kvm_set_user_memory_region: KVM_SET_USER_MEMORY_REGION failed, slot=4, start=0xfffffffffe000000, size=0x2000: Invalid argument -kvm_set_phys_mem: error registering slot: Invalid argument -2023-03-14 10:09:18.198+0000: shutting down, reason=crashed -``` -The xml file -``` -root@c1c2:~# cat /etc/libvirt/qemu/instance-0000000e.xml -<!-- -WARNING: THIS IS AN AUTO-GENERATED FILE. CHANGES TO IT ARE LIKELY TO BE -OVERWRITTEN AND LOST. Changes to this xml configuration should be made using: - virsh edit instance-0000000e -or other application using the libvirt API. ---> -<domain type='kvm'> - <name>instance-0000000e</name> - <uuid>ff91d2dc-69a1-43ef-abde-c9e4e9a0305b</uuid> - <metadata> -  <nova:instance xmlns:nova=" -http://openstack.org/xmlns/libvirt/nova/1.1 -"> -   <nova:package version="25.1.0"/> -   <nova:name>provider-instance</nova:name> -   <nova:creationTime>2023-03-14 10:09:13</nova:creationTime> -   <nova:flavor name="cirros-os-dpu-test-1"> -    <nova:memory>64</nova:memory> -    <nova:disk>1</nova:disk> -    <nova:swap>0</nova:swap> -    <nova:ephemeral>0</nova:ephemeral> -    <nova:vcpus>1</nova:vcpus> -   </nova:flavor> -   <nova:owner> -    <nova:user uuid="ff627ad39ed94479b9c5033bc462cf78">admin</nova:user> -    <nova:project uuid="512866f9994f4ad8916d8539a7cdeec9">admin</nova:project> -   </nova:owner> -   <nova:root type="image" uuid="9e58cb69-316a-4093-9f23-c1d1bd8edffe"/> -   <nova:ports> -    <nova:port uuid="77c1dc00-af39-4463-bea0-12808f4bc340"> -     <nova:ip type="fixed" address="172.1.1.43" ipVersion="4"/> -    </nova:port> -   </nova:ports> -  </nova:instance> - </metadata> - <memory unit='KiB'>65536</memory> - <currentMemory unit='KiB'>65536</currentMemory> - <vcpu placement='static'>1</vcpu> - <sysinfo type='smbios'> -  <system> -   <entry name='manufacturer'>OpenStack Foundation</entry> -   <entry name='product'>OpenStack Nova</entry> -   <entry name='version'>25.1.0</entry> -   <entry name='serial'>ff91d2dc-69a1-43ef-abde-c9e4e9a0305b</entry> -   <entry name='uuid'>ff91d2dc-69a1-43ef-abde-c9e4e9a0305b</entry> -   <entry name='family'>Virtual Machine</entry> -  </system> - </sysinfo> - <os> -  <type arch='x86_64' machine='pc-i440fx-6.2'>hvm</type> -  <boot dev='hd'/> -  <smbios mode='sysinfo'/> - </os> - <features> -  <acpi/> -  <apic/> -  <vmcoreinfo state='on'/> - </features> - <cpu mode='host-model' check='partial'> -  <topology sockets='1' dies='1' cores='1' threads='1'/> - </cpu> - <clock offset='utc'> -  <timer name='pit' tickpolicy='delay'/> -  <timer name='rtc' tickpolicy='catchup'/> -  <timer name='hpet' present='no'/> - </clock> - <on_poweroff>destroy</on_poweroff> - <on_reboot>restart</on_reboot> - <on_crash>destroy</on_crash> - <devices> -  <emulator>/usr/bin/qemu-system-x86_64</emulator> -  <disk type='file' device='disk'> -   <driver name='qemu' type='qcow2' cache='none'/> -   <source file='/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/disk'/> -   <target dev='vda' bus='virtio'/> -   <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> -  </disk> -  <controller type='usb' index='0' model='piix3-uhci'> -   <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/> -  </controller> -  <controller type='pci' index='0' model='pci-root'/> -  <interface type='hostdev' managed='yes'> -   <mac address='fa:16:3e:aa:d9:23'/> -   <source> -    <address type='pci' domain='0x0000' bus='0x01' slot='0x00' function='0x5'/> -   </source> -   <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/> -  </interface> -  <serial type='pty'> -   <log file='/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/console.log' append='off'/> -   <target type='isa-serial' port='0'> -    <model name='isa-serial'/> -   </target> -  </serial> -  <console type='pty'> -   <log file='/var/lib/nova/instances/ff91d2dc-69a1-43ef-abde-c9e4e9a0305b/console.log' append='off'/> -   <target type='serial' port='0'/> -  </console> -  <input type='tablet' bus='usb'> -   <address type='usb' bus='0' port='1'/> -  </input> -  <input type='mouse' bus='ps2'/> -  <input type='keyboard' bus='ps2'/> -  <graphics type='vnc' port='-1' autoport='yes' listen='0.0.0.0'> -   <listen type='address' address='0.0.0.0'/> -  </graphics> -  <audio id='1' type='none'/> -  <video> -   <model type='virtio' heads='1' primary='yes'/> -   <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> -  </video> -  <hostdev mode='subsystem' type='pci' managed='yes'> -   <source> -    <address domain='0x0000' bus='0x01' slot='0x00' function='0x6'/> -   </source> -   <address type='pci' domain='0x0000' bus='0x00' slot='0x05' function='0x0'/> -  </hostdev> -  <memballoon model='virtio'> -   <stats period='10'/> -   <address type='pci' domain='0x0000' bus='0x00' slot='0x06' function='0x0'/> -  </memballoon> -  <rng model='virtio'> -   <backend model='random'>/dev/urandom</backend> -   <address type='pci' domain='0x0000' bus='0x00' slot='0x07' function='0x0'/> -  </rng> - </devices> -</domain> -``` ----- -Simon Jones - diff --git a/classification_output/03/KVM/26430026 b/classification_output/03/KVM/26430026 deleted file mode 100644 index 5ecea6ffe..000000000 --- a/classification_output/03/KVM/26430026 +++ /dev/null @@ -1,168 +0,0 @@ -KVM: 0.919 -mistranslation: 0.915 -semantic: 0.904 -instruction: 0.888 -boot: 0.841 -other: 0.813 -network: 0.758 - -[BUG] cxl,i386: e820 mappings may not be correct for cxl - -Context included below from prior discussion - - `cxl create-region` would fail on inability to allocate memory - - traced this down to the memory region being marked RESERVED - - E820 map marks the CXL fixed memory window as RESERVED - - -Re: x86 errors, I found that region worked with this patch. (I also -added the SRAT patches the Davidlohr posted, but I do not think they are -relevant). - -I don't think this is correct, and setting this to E820_RAM causes the -system to fail to boot at all, but with this change `cxl create-region` -succeeds, which suggests our e820 mappings in the i386 machine are -incorrect. - -Anyone who can help or have an idea as to what e820 should actually be -doing with this region, or if this is correct and something else is -failing, please help! - - -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 566accf7e6..a5e688a742 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -1077,7 +1077,7 @@ void pc_memory_init(PCMachineState *pcms, - memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, fw, - "cxl-fixed-memory-region", fw->size); - memory_region_add_subregion(system_memory, fw->base, &fw->mr); -- e820_add_entry(fw->base, fw->size, E820_RESERVED); -+ e820_add_entry(fw->base, fw->size, E820_NVS); - cxl_fmw_base += fw->size; - cxl_resv_end = cxl_fmw_base; - } - - -On Mon, Oct 10, 2022 at 05:32:42PM +0100, Jonathan Cameron wrote: -> -> -> > but i'm not sure of what to do with this info. We have some proof -> -> > that real hardware works with this no problem, and the only difference -> -> > is that the EFI/bios/firmware is setting the memory regions as `usable` -> -> > or `soft reserved`, which would imply the EDK2 is the blocker here -> -> > regardless of the OS driver status. -> -> > -> -> > But I'd seen elsewhere you had gotten some of this working, and I'm -> -> > failing to get anything working at the moment. If you have any input i -> -> > would greatly appreciate the help. -> -> > -> -> > QEMU config: -> -> > -> -> > /opt/qemu-cxl2/bin/qemu-system-x86_64 \ -> -> > -drive -> -> > file=/var/lib/libvirt/images/cxl.qcow2,format=qcow2,index=0,media=d\ -> -> > -m 2G,slots=4,maxmem=4G \ -> -> > -smp 4 \ -> -> > -machine type=q35,accel=kvm,cxl=on \ -> -> > -enable-kvm \ -> -> > -nographic \ -> -> > -device pxb-cxl,id=cxl.0,bus=pcie.0,bus_nr=52 \ -> -> > -device cxl-rp,id=rp0,bus=cxl.0,chassis=0,slot=0 \ -> -> > -object memory-backend-file,id=cxl-mem0,mem-path=/tmp/cxl-mem0,size=256M \ -> -> > -object memory-backend-file,id=lsa0,mem-path=/tmp/cxl-lsa0,size=256M \ -> -> > -device cxl-type3,bus=rp0,pmem=true,memdev=cxl-mem0,lsa=lsa0,id=cxl-pmem0 -> -> > \ -> -> > -M cxl-fmw.0.targets.0=cxl.0,cxl-fmw.0.size=256M -> -> > -> -> > I'd seen on the lists that you had seen issues with single-rp setups, -> -> > but no combination of configuration I've tried (including all the ones -> -> > in the docs and tests) lead to a successful region creation with -> -> > `cxl create-region` -> -> -> -> Hmm. Let me have a play. I've not run x86 tests for a while so -> -> perhaps something is missing there. -> -> -> -> I'm carrying a patch to override check_last_peer() in -> -> cxl_port_setup_targets() as that is wrong for some combinations, -> -> but that doesn't look like it's related to what you are seeing. -> -> -I'm not sure if it's relevant, but turned out I'd forgotten I'm carrying 3 -> -patches that aren't upstream (and one is a horrible hack). -> -> -Hack: -https://lore.kernel.org/linux-cxl/20220819094655.000005ed@huawei.com/ -> -Shouldn't affect a simple case like this... -> -> -https://lore.kernel.org/linux-cxl/20220819093133.00006c22@huawei.com/T/#t -> -(Dan's version) -> -> -https://lore.kernel.org/linux-cxl/20220815154044.24733-1-Jonathan.Cameron@huawei.com/T/#t -> -> -For writes to work you will currently need two rps (nothing on the second is -> -fine) -> -as we still haven't resolved if the kernel should support an HDM decoder on -> -a host bridge with one port. I think it should (Spec allows it), others -> -unconvinced. -> -> -Note I haven't shifted over to x86 yet so may still be something different -> -from -> -arm64. -> -> -Jonathan -> -> - diff --git a/classification_output/03/KVM/33802194 b/classification_output/03/KVM/33802194 deleted file mode 100644 index 5a0c1895c..000000000 --- a/classification_output/03/KVM/33802194 +++ /dev/null @@ -1,4942 +0,0 @@ -KVM: 0.725 -instruction: 0.693 -mistranslation: 0.687 -semantic: 0.656 -network: 0.644 -other: 0.637 -boot: 0.631 - -[BUG] cxl can not create region - -Hi list - -I want to test cxl functions in arm64, and found some problems I can't -figure out. - -My test environment: - -1. build latest bios from -https://github.com/tianocore/edk2.git -master -branch(cc2db6ebfb6d9d85ba4c7b35fba1fa37fffc0bc2) -2. build latest qemu-system-aarch64 from git://git.qemu.org/qemu.git -master branch(846dcf0ba4eff824c295f06550b8673ff3f31314). With cxl arm -support patch: -https://patchwork.kernel.org/project/cxl/cover/20220616141950.23374-1-Jonathan.Cameron@huawei.com/ -3. build Linux kernel from -https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git -preview -branch(65fc1c3d26b96002a5aa1f4012fae4dc98fd5683) -4. build latest ndctl tools from -https://github.com/pmem/ndctl -create_region branch(8558b394e449779e3a4f3ae90fae77ede0bca159) - -And my qemu test commands: -sudo $QEMU_BIN -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 \ - -cpu max -smp 8 -nographic -no-reboot \ - -kernel $KERNEL -bios $BIOS_BIN \ - -drive if=none,file=$ROOTFS,format=qcow2,id=hd \ - -device virtio-blk-pci,drive=hd -append 'root=/dev/vda1 -nokaslr dyndbg="module cxl* +p"' \ - -object memory-backend-ram,size=4G,id=mem0 \ - -numa node,nodeid=0,cpus=0-7,memdev=mem0 \ - -net nic -net user,hostfwd=tcp::2222-:22 -enable-kvm \ - -object -memory-backend-file,id=cxl-mem0,share=on,mem-path=/tmp/cxltest.raw,size=256M -\ - -object -memory-backend-file,id=cxl-mem1,share=on,mem-path=/tmp/cxltest1.raw,size=256M -\ - -object -memory-backend-file,id=cxl-mem2,share=on,mem-path=/tmp/cxltest2.raw,size=256M -\ - -object -memory-backend-file,id=cxl-mem3,share=on,mem-path=/tmp/cxltest3.raw,size=256M -\ - -object -memory-backend-file,id=cxl-lsa0,share=on,mem-path=/tmp/lsa0.raw,size=256M -\ - -object -memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/lsa1.raw,size=256M -\ - -object -memory-backend-file,id=cxl-lsa2,share=on,mem-path=/tmp/lsa2.raw,size=256M -\ - -object -memory-backend-file,id=cxl-lsa3,share=on,mem-path=/tmp/lsa3.raw,size=256M -\ - -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \ - -device cxl-rp,port=0,bus=cxl.1,id=root_port0,chassis=0,slot=0 \ - -device cxl-upstream,bus=root_port0,id=us0 \ - -device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \ - -device -cxl-type3,bus=swport0,memdev=cxl-mem0,lsa=cxl-lsa0,id=cxl-pmem0 \ - -device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \ - -device -cxl-type3,bus=swport1,memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem1 \ - -device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \ - -device -cxl-type3,bus=swport2,memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem2 \ - -device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \ - -device -cxl-type3,bus=swport3,memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3 \ - -M -cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k - -And I have got two problems. -1. When I want to create x1 region with command: "cxl create-region -d -decoder0.0 -w 1 -g 4096 mem0", kernel crashed with null pointer -reference. Crash log: - -[ 534.697324] cxl_region region0: config state: 0 -[ 534.697346] cxl_region region0: probe: -6 -[ 534.697368] cxl_acpi ACPI0017:00: decoder0.0: created region0 -[ 534.699115] cxl region0: mem0:endpoint3 decoder3.0 add: -mem0:decoder3.0 @ 0 next: none nr_eps: 1 nr_targets: 1 -[ 534.699149] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: -mem0:decoder3.0 @ 0 next: mem0 nr_eps: 1 nr_targets: 1 -[ 534.699167] cxl region0: ACPI0016:00:port1 decoder1.0 add: -mem0:decoder3.0 @ 0 next: 0000:0d:00.0 nr_eps: 1 nr_targets: 1 -[ 534.699176] cxl region0: ACPI0016:00:port1 iw: 1 ig: 256 -[ 534.699182] cxl region0: ACPI0016:00:port1 target[0] = 0000:0c:00.0 -for mem0:decoder3.0 @ 0 -[ 534.699189] cxl region0: 0000:0d:00.0:port2 iw: 1 ig: 256 -[ 534.699193] cxl region0: 0000:0d:00.0:port2 target[0] = -0000:0e:00.0 for mem0:decoder3.0 @ 0 -[ 534.699405] Unable to handle kernel NULL pointer dereference at -virtual address 0000000000000000 -[ 534.701474] Mem abort info: -[ 534.701994] ESR = 0x0000000086000004 -[ 534.702653] EC = 0x21: IABT (current EL), IL = 32 bits -[ 534.703616] SET = 0, FnV = 0 -[ 534.704174] EA = 0, S1PTW = 0 -[ 534.704803] FSC = 0x04: level 0 translation fault -[ 534.705694] user pgtable: 4k pages, 48-bit VAs, pgdp=000000010144a000 -[ 534.706875] [0000000000000000] pgd=0000000000000000, p4d=0000000000000000 -[ 534.709855] Internal error: Oops: 86000004 [#1] PREEMPT SMP -[ 534.710301] Modules linked in: -[ 534.710546] CPU: 7 PID: 331 Comm: cxl Not tainted -5.19.0-rc3-00064-g65fc1c3d26b9-dirty #11 -[ 534.715393] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 -[ 534.717179] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) -[ 534.719190] pc : 0x0 -[ 534.719928] lr : commit_store+0x118/0x2cc -[ 534.721007] sp : ffff80000aec3c30 -[ 534.721793] x29: ffff80000aec3c30 x28: ffff0000da62e740 x27: ffff0000c0c06b30 -[ 534.723875] x26: 0000000000000000 x25: ffff0000c0a2a400 x24: ffff0000c0a29400 -[ 534.725440] x23: 0000000000000003 x22: 0000000000000000 x21: ffff0000c0c06800 -[ 534.727312] x20: 0000000000000000 x19: ffff0000c1559800 x18: 0000000000000000 -[ 534.729138] x17: 0000000000000000 x16: 0000000000000000 x15: 0000ffffd41fe838 -[ 534.731046] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 -[ 534.732402] x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000 -[ 534.734432] x8 : 0000000000000000 x7 : 0000000000000000 x6 : ffff0000c0906e80 -[ 534.735921] x5 : 0000000000000000 x4 : 0000000000000000 x3 : ffff80000aec3bf0 -[ 534.737437] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff0000c155a000 -[ 534.738878] Call trace: -[ 534.739368] 0x0 -[ 534.739713] dev_attr_store+0x1c/0x30 -[ 534.740186] sysfs_kf_write+0x48/0x58 -[ 534.740961] kernfs_fop_write_iter+0x128/0x184 -[ 534.741872] new_sync_write+0xdc/0x158 -[ 534.742706] vfs_write+0x1ac/0x2a8 -[ 534.743440] ksys_write+0x68/0xf0 -[ 534.744328] __arm64_sys_write+0x1c/0x28 -[ 534.745180] invoke_syscall+0x44/0xf0 -[ 534.745989] el0_svc_common+0x4c/0xfc -[ 534.746661] do_el0_svc+0x60/0xa8 -[ 534.747378] el0_svc+0x2c/0x78 -[ 534.748066] el0t_64_sync_handler+0xb8/0x12c -[ 534.748919] el0t_64_sync+0x18c/0x190 -[ 534.749629] Code: bad PC value -[ 534.750169] ---[ end trace 0000000000000000 ]--- - -2. When I want to create x4 region with command: "cxl create-region -d -decoder0.0 -w 4 -g 4096 -m mem0 mem1 mem2 mem3". I got below errors: - -cxl region: create_region: region0: failed to set target3 to mem3 -cxl region: cmd_create_region: created 0 regions - -And kernel log as below: -[ 60.536663] cxl_region region0: config state: 0 -[ 60.536675] cxl_region region0: probe: -6 -[ 60.536696] cxl_acpi ACPI0017:00: decoder0.0: created region0 -[ 60.538251] cxl region0: mem0:endpoint3 decoder3.0 add: -mem0:decoder3.0 @ 0 next: none nr_eps: 1 nr_targets: 1 -[ 60.538278] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: -mem0:decoder3.0 @ 0 next: mem0 nr_eps: 1 nr_targets: 1 -[ 60.538295] cxl region0: ACPI0016:00:port1 decoder1.0 add: -mem0:decoder3.0 @ 0 next: 0000:0d:00.0 nr_eps: 1 nr_targets: 1 -[ 60.538647] cxl region0: mem1:endpoint4 decoder4.0 add: -mem1:decoder4.0 @ 1 next: none nr_eps: 1 nr_targets: 1 -[ 60.538663] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: -mem1:decoder4.0 @ 1 next: mem1 nr_eps: 2 nr_targets: 2 -[ 60.538675] cxl region0: ACPI0016:00:port1 decoder1.0 add: -mem1:decoder4.0 @ 1 next: 0000:0d:00.0 nr_eps: 2 nr_targets: 1 -[ 60.539311] cxl region0: mem2:endpoint5 decoder5.0 add: -mem2:decoder5.0 @ 2 next: none nr_eps: 1 nr_targets: 1 -[ 60.539332] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: -mem2:decoder5.0 @ 2 next: mem2 nr_eps: 3 nr_targets: 3 -[ 60.539343] cxl region0: ACPI0016:00:port1 decoder1.0 add: -mem2:decoder5.0 @ 2 next: 0000:0d:00.0 nr_eps: 3 nr_targets: 1 -[ 60.539711] cxl region0: mem3:endpoint6 decoder6.0 add: -mem3:decoder6.0 @ 3 next: none nr_eps: 1 nr_targets: 1 -[ 60.539723] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: -mem3:decoder6.0 @ 3 next: mem3 nr_eps: 4 nr_targets: 4 -[ 60.539735] cxl region0: ACPI0016:00:port1 decoder1.0 add: -mem3:decoder6.0 @ 3 next: 0000:0d:00.0 nr_eps: 4 nr_targets: 1 -[ 60.539742] cxl region0: ACPI0016:00:port1 iw: 1 ig: 256 -[ 60.539747] cxl region0: ACPI0016:00:port1 target[0] = 0000:0c:00.0 -for mem0:decoder3.0 @ 0 -[ 60.539754] cxl region0: 0000:0d:00.0:port2 iw: 4 ig: 512 -[ 60.539758] cxl region0: 0000:0d:00.0:port2 target[0] = -0000:0e:00.0 for mem0:decoder3.0 @ 0 -[ 60.539764] cxl region0: ACPI0016:00:port1: cannot host mem1:decoder4.0 at 1 - -I have tried to write sysfs node manually, got same errors. - -Hope I can get some helps here. - -Bob - -On Fri, 5 Aug 2022 10:20:23 +0800 -Bobo WL <lmw.bobo@gmail.com> wrote: - -> -Hi list -> -> -I want to test cxl functions in arm64, and found some problems I can't -> -figure out. -Hi Bob, - -Glad to see people testing this code. - -> -> -My test environment: -> -> -1. build latest bios from -https://github.com/tianocore/edk2.git -master -> -branch(cc2db6ebfb6d9d85ba4c7b35fba1fa37fffc0bc2) -> -2. build latest qemu-system-aarch64 from git://git.qemu.org/qemu.git -> -master branch(846dcf0ba4eff824c295f06550b8673ff3f31314). With cxl arm -> -support patch: -> -https://patchwork.kernel.org/project/cxl/cover/20220616141950.23374-1-Jonathan.Cameron@huawei.com/ -> -3. build Linux kernel from -> -https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git -preview -> -branch(65fc1c3d26b96002a5aa1f4012fae4dc98fd5683) -> -4. build latest ndctl tools from -https://github.com/pmem/ndctl -> -create_region branch(8558b394e449779e3a4f3ae90fae77ede0bca159) -> -> -And my qemu test commands: -> -sudo $QEMU_BIN -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 \ -> --cpu max -smp 8 -nographic -no-reboot \ -> --kernel $KERNEL -bios $BIOS_BIN \ -> --drive if=none,file=$ROOTFS,format=qcow2,id=hd \ -> --device virtio-blk-pci,drive=hd -append 'root=/dev/vda1 -> -nokaslr dyndbg="module cxl* +p"' \ -> --object memory-backend-ram,size=4G,id=mem0 \ -> --numa node,nodeid=0,cpus=0-7,memdev=mem0 \ -> --net nic -net user,hostfwd=tcp::2222-:22 -enable-kvm \ -> --object -> -memory-backend-file,id=cxl-mem0,share=on,mem-path=/tmp/cxltest.raw,size=256M -> -\ -> --object -> -memory-backend-file,id=cxl-mem1,share=on,mem-path=/tmp/cxltest1.raw,size=256M -> -\ -> --object -> -memory-backend-file,id=cxl-mem2,share=on,mem-path=/tmp/cxltest2.raw,size=256M -> -\ -> --object -> -memory-backend-file,id=cxl-mem3,share=on,mem-path=/tmp/cxltest3.raw,size=256M -> -\ -> --object -> -memory-backend-file,id=cxl-lsa0,share=on,mem-path=/tmp/lsa0.raw,size=256M -> -\ -> --object -> -memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/lsa1.raw,size=256M -> -\ -> --object -> -memory-backend-file,id=cxl-lsa2,share=on,mem-path=/tmp/lsa2.raw,size=256M -> -\ -> --object -> -memory-backend-file,id=cxl-lsa3,share=on,mem-path=/tmp/lsa3.raw,size=256M -> -\ -> --device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \ -> --device cxl-rp,port=0,bus=cxl.1,id=root_port0,chassis=0,slot=0 \ -Probably not related to your problem, but there is a disconnect in QEMU / -kernel assumptionsaround the presence of an HDM decoder when a HB only -has a single root port. Spec allows it to be provided or not as an -implementation choice. -Kernel assumes it isn't provide. Qemu assumes it is. - -The temporary solution is to throw in a second root port on the HB and not -connect anything to it. Longer term I may special case this so that the -particular -decoder defaults to pass through settings in QEMU if there is only one root -port. - -> --device cxl-upstream,bus=root_port0,id=us0 \ -> --device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \ -> --device -> -cxl-type3,bus=swport0,memdev=cxl-mem0,lsa=cxl-lsa0,id=cxl-pmem0 \ -> --device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \ -> --device -> -cxl-type3,bus=swport1,memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem1 \ -> --device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \ -> --device -> -cxl-type3,bus=swport2,memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem2 \ -> --device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \ -> --device -> -cxl-type3,bus=swport3,memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3 \ -> --M -> -cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k -> -> -And I have got two problems. -> -1. When I want to create x1 region with command: "cxl create-region -d -> -decoder0.0 -w 1 -g 4096 mem0", kernel crashed with null pointer -> -reference. Crash log: -> -> -[ 534.697324] cxl_region region0: config state: 0 -> -[ 534.697346] cxl_region region0: probe: -6 -Seems odd this is up here. But maybe fine. - -> -[ 534.697368] cxl_acpi ACPI0017:00: decoder0.0: created region0 -> -[ 534.699115] cxl region0: mem0:endpoint3 decoder3.0 add: -> -mem0:decoder3.0 @ 0 next: none nr_eps: 1 nr_targets: 1 -> -[ 534.699149] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: -> -mem0:decoder3.0 @ 0 next: mem0 nr_eps: 1 nr_targets: 1 -> -[ 534.699167] cxl region0: ACPI0016:00:port1 decoder1.0 add: -> -mem0:decoder3.0 @ 0 next: 0000:0d:00.0 nr_eps: 1 nr_targets: 1 -> -[ 534.699176] cxl region0: ACPI0016:00:port1 iw: 1 ig: 256 -> -[ 534.699182] cxl region0: ACPI0016:00:port1 target[0] = 0000:0c:00.0 -> -for mem0:decoder3.0 @ 0 -> -[ 534.699189] cxl region0: 0000:0d:00.0:port2 iw: 1 ig: 256 -> -[ 534.699193] cxl region0: 0000:0d:00.0:port2 target[0] = -> -0000:0e:00.0 for mem0:decoder3.0 @ 0 -> -[ 534.699405] Unable to handle kernel NULL pointer dereference at -> -virtual address 0000000000000000 -> -[ 534.701474] Mem abort info: -> -[ 534.701994] ESR = 0x0000000086000004 -> -[ 534.702653] EC = 0x21: IABT (current EL), IL = 32 bits -> -[ 534.703616] SET = 0, FnV = 0 -> -[ 534.704174] EA = 0, S1PTW = 0 -> -[ 534.704803] FSC = 0x04: level 0 translation fault -> -[ 534.705694] user pgtable: 4k pages, 48-bit VAs, pgdp=000000010144a000 -> -[ 534.706875] [0000000000000000] pgd=0000000000000000, p4d=0000000000000000 -> -[ 534.709855] Internal error: Oops: 86000004 [#1] PREEMPT SMP -> -[ 534.710301] Modules linked in: -> -[ 534.710546] CPU: 7 PID: 331 Comm: cxl Not tainted -> -5.19.0-rc3-00064-g65fc1c3d26b9-dirty #11 -> -[ 534.715393] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 -> -[ 534.717179] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) -> -[ 534.719190] pc : 0x0 -> -[ 534.719928] lr : commit_store+0x118/0x2cc -> -[ 534.721007] sp : ffff80000aec3c30 -> -[ 534.721793] x29: ffff80000aec3c30 x28: ffff0000da62e740 x27: -> -ffff0000c0c06b30 -> -[ 534.723875] x26: 0000000000000000 x25: ffff0000c0a2a400 x24: -> -ffff0000c0a29400 -> -[ 534.725440] x23: 0000000000000003 x22: 0000000000000000 x21: -> -ffff0000c0c06800 -> -[ 534.727312] x20: 0000000000000000 x19: ffff0000c1559800 x18: -> -0000000000000000 -> -[ 534.729138] x17: 0000000000000000 x16: 0000000000000000 x15: -> -0000ffffd41fe838 -> -[ 534.731046] x14: 0000000000000000 x13: 0000000000000000 x12: -> -0000000000000000 -> -[ 534.732402] x11: 0000000000000000 x10: 0000000000000000 x9 : -> -0000000000000000 -> -[ 534.734432] x8 : 0000000000000000 x7 : 0000000000000000 x6 : -> -ffff0000c0906e80 -> -[ 534.735921] x5 : 0000000000000000 x4 : 0000000000000000 x3 : -> -ffff80000aec3bf0 -> -[ 534.737437] x2 : 0000000000000000 x1 : 0000000000000000 x0 : -> -ffff0000c155a000 -> -[ 534.738878] Call trace: -> -[ 534.739368] 0x0 -> -[ 534.739713] dev_attr_store+0x1c/0x30 -> -[ 534.740186] sysfs_kf_write+0x48/0x58 -> -[ 534.740961] kernfs_fop_write_iter+0x128/0x184 -> -[ 534.741872] new_sync_write+0xdc/0x158 -> -[ 534.742706] vfs_write+0x1ac/0x2a8 -> -[ 534.743440] ksys_write+0x68/0xf0 -> -[ 534.744328] __arm64_sys_write+0x1c/0x28 -> -[ 534.745180] invoke_syscall+0x44/0xf0 -> -[ 534.745989] el0_svc_common+0x4c/0xfc -> -[ 534.746661] do_el0_svc+0x60/0xa8 -> -[ 534.747378] el0_svc+0x2c/0x78 -> -[ 534.748066] el0t_64_sync_handler+0xb8/0x12c -> -[ 534.748919] el0t_64_sync+0x18c/0x190 -> -[ 534.749629] Code: bad PC value -> -[ 534.750169] ---[ end trace 0000000000000000 ]--- -> -> -2. When I want to create x4 region with command: "cxl create-region -d -> -decoder0.0 -w 4 -g 4096 -m mem0 mem1 mem2 mem3". I got below errors: -> -> -cxl region: create_region: region0: failed to set target3 to mem3 -> -cxl region: cmd_create_region: created 0 regions -> -> -And kernel log as below: -> -[ 60.536663] cxl_region region0: config state: 0 -> -[ 60.536675] cxl_region region0: probe: -6 -> -[ 60.536696] cxl_acpi ACPI0017:00: decoder0.0: created region0 -> -[ 60.538251] cxl region0: mem0:endpoint3 decoder3.0 add: -> -mem0:decoder3.0 @ 0 next: none nr_eps: 1 nr_targets: 1 -> -[ 60.538278] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: -> -mem0:decoder3.0 @ 0 next: mem0 nr_eps: 1 nr_targets: 1 -> -[ 60.538295] cxl region0: ACPI0016:00:port1 decoder1.0 add: -> -mem0:decoder3.0 @ 0 next: 0000:0d:00.0 nr_eps: 1 nr_targets: 1 -> -[ 60.538647] cxl region0: mem1:endpoint4 decoder4.0 add: -> -mem1:decoder4.0 @ 1 next: none nr_eps: 1 nr_targets: 1 -> -[ 60.538663] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: -> -mem1:decoder4.0 @ 1 next: mem1 nr_eps: 2 nr_targets: 2 -> -[ 60.538675] cxl region0: ACPI0016:00:port1 decoder1.0 add: -> -mem1:decoder4.0 @ 1 next: 0000:0d:00.0 nr_eps: 2 nr_targets: 1 -> -[ 60.539311] cxl region0: mem2:endpoint5 decoder5.0 add: -> -mem2:decoder5.0 @ 2 next: none nr_eps: 1 nr_targets: 1 -> -[ 60.539332] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: -> -mem2:decoder5.0 @ 2 next: mem2 nr_eps: 3 nr_targets: 3 -> -[ 60.539343] cxl region0: ACPI0016:00:port1 decoder1.0 add: -> -mem2:decoder5.0 @ 2 next: 0000:0d:00.0 nr_eps: 3 nr_targets: 1 -> -[ 60.539711] cxl region0: mem3:endpoint6 decoder6.0 add: -> -mem3:decoder6.0 @ 3 next: none nr_eps: 1 nr_targets: 1 -> -[ 60.539723] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: -> -mem3:decoder6.0 @ 3 next: mem3 nr_eps: 4 nr_targets: 4 -> -[ 60.539735] cxl region0: ACPI0016:00:port1 decoder1.0 add: -> -mem3:decoder6.0 @ 3 next: 0000:0d:00.0 nr_eps: 4 nr_targets: 1 -> -[ 60.539742] cxl region0: ACPI0016:00:port1 iw: 1 ig: 256 -> -[ 60.539747] cxl region0: ACPI0016:00:port1 target[0] = 0000:0c:00.0 -> -for mem0:decoder3.0 @ 0 -> -[ 60.539754] cxl region0: 0000:0d:00.0:port2 iw: 4 ig: 512 -This looks like off by 1 that should be fixed in the below mentioned -cxl/pending branch. That ig should be 256. Note the fix was -for a test case with a fat HB and no switch, but certainly looks -like this is the same issue. - -> -[ 60.539758] cxl region0: 0000:0d:00.0:port2 target[0] = -> -0000:0e:00.0 for mem0:decoder3.0 @ 0 -> -[ 60.539764] cxl region0: ACPI0016:00:port1: cannot host mem1:decoder4.0 at -> -1 -> -> -I have tried to write sysfs node manually, got same errors. -When stepping through by hand, which sysfs write triggers the crash above? - -Not sure it's related, but I've just sent out a fix to the -target register handling in QEMU. -20220808122051.14822-1-Jonathan.Cameron@huawei.com -/T/#m47ff985412ce44559e6b04d677c302f8cd371330">https://lore.kernel.org/linux-cxl/ -20220808122051.14822-1-Jonathan.Cameron@huawei.com -/T/#m47ff985412ce44559e6b04d677c302f8cd371330 -I did have one instance last week of triggering what looked to be a race -condition but -the stack trace doesn't looks related to what you've hit. - -It will probably be a few days before I have time to take a look at replicating -what you have seen. - -If you have time, try using the kernel.org cxl/pending branch as there are -a few additional fixes on there since you sent this email. Optimistic to hope -this is covered by one of those, but at least it will mean we are trying to -replicate -on same branch. - -Jonathan - - -> -> -Hope I can get some helps here. -> -> -Bob - -Hi Jonathan - -Thanks for your reply! - -On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron -<Jonathan.Cameron@huawei.com> wrote: -> -> -Probably not related to your problem, but there is a disconnect in QEMU / -> -kernel assumptionsaround the presence of an HDM decoder when a HB only -> -has a single root port. Spec allows it to be provided or not as an -> -implementation choice. -> -Kernel assumes it isn't provide. Qemu assumes it is. -> -> -The temporary solution is to throw in a second root port on the HB and not -> -connect anything to it. Longer term I may special case this so that the -> -particular -> -decoder defaults to pass through settings in QEMU if there is only one root -> -port. -> -You are right! After adding an extra HB in qemu, I can create a x1 -region successfully. -But have some errors in Nvdimm: - -[ 74.925838] Unknown online node for memory at 0x10000000000, assuming node 0 -[ 74.925846] Unknown target node for memory at 0x10000000000, assuming node 0 -[ 74.927470] nd_region region0: nmem0: is disabled, failing probe - -And x4 region still failed with same errors, using latest cxl/preview -branch don't work. -I have picked "Two CXL emulation fixes" patches in qemu, still not working. - -Bob - -On Tue, 9 Aug 2022 21:07:06 +0800 -Bobo WL <lmw.bobo@gmail.com> wrote: - -> -Hi Jonathan -> -> -Thanks for your reply! -> -> -On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron -> -<Jonathan.Cameron@huawei.com> wrote: -> -> -> -> Probably not related to your problem, but there is a disconnect in QEMU / -> -> kernel assumptionsaround the presence of an HDM decoder when a HB only -> -> has a single root port. Spec allows it to be provided or not as an -> -> implementation choice. -> -> Kernel assumes it isn't provide. Qemu assumes it is. -> -> -> -> The temporary solution is to throw in a second root port on the HB and not -> -> connect anything to it. Longer term I may special case this so that the -> -> particular -> -> decoder defaults to pass through settings in QEMU if there is only one root -> -> port. -> -> -> -> -You are right! After adding an extra HB in qemu, I can create a x1 -> -region successfully. -> -But have some errors in Nvdimm: -> -> -[ 74.925838] Unknown online node for memory at 0x10000000000, assuming node > 0 -> -[ 74.925846] Unknown target node for memory at 0x10000000000, assuming node > 0 -> -[ 74.927470] nd_region region0: nmem0: is disabled, failing probe -Ah. I've seen this one, but not chased it down yet. Was on my todo list to -chase -down. Once I reach this state I can verify the HDM Decode is correct which is -what -I've been using to test (Which wasn't true until earlier this week). -I'm currently testing via devmem, more for historical reasons than because it -makes -that much sense anymore. - -> -> -And x4 region still failed with same errors, using latest cxl/preview -> -branch don't work. -> -I have picked "Two CXL emulation fixes" patches in qemu, still not working. -> -> -Bob - -On Tue, 9 Aug 2022 17:08:25 +0100 -Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: - -> -On Tue, 9 Aug 2022 21:07:06 +0800 -> -Bobo WL <lmw.bobo@gmail.com> wrote: -> -> -> Hi Jonathan -> -> -> -> Thanks for your reply! -> -> -> -> On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron -> -> <Jonathan.Cameron@huawei.com> wrote: -> -> > -> -> > Probably not related to your problem, but there is a disconnect in QEMU / -> -> > kernel assumptionsaround the presence of an HDM decoder when a HB only -> -> > has a single root port. Spec allows it to be provided or not as an -> -> > implementation choice. -> -> > Kernel assumes it isn't provide. Qemu assumes it is. -> -> > -> -> > The temporary solution is to throw in a second root port on the HB and not -> -> > connect anything to it. Longer term I may special case this so that the -> -> > particular -> -> > decoder defaults to pass through settings in QEMU if there is only one -> -> > root port. -> -> > -> -> -> -> You are right! After adding an extra HB in qemu, I can create a x1 -> -> region successfully. -> -> But have some errors in Nvdimm: -> -> -> -> [ 74.925838] Unknown online node for memory at 0x10000000000, assuming -> -> node 0 -> -> [ 74.925846] Unknown target node for memory at 0x10000000000, assuming -> -> node 0 -> -> [ 74.927470] nd_region region0: nmem0: is disabled, failing probe -> -> -Ah. I've seen this one, but not chased it down yet. Was on my todo list to -> -chase -> -down. Once I reach this state I can verify the HDM Decode is correct which is -> -what -> -I've been using to test (Which wasn't true until earlier this week). -> -I'm currently testing via devmem, more for historical reasons than because it -> -makes -> -that much sense anymore. -*embarassed cough*. We haven't fully hooked the LSA up in qemu yet. -I'd forgotten that was still on the todo list. I don't think it will -be particularly hard to do and will take a look in next few days. - -Very very indirectly this error is causing a driver probe fail that means that -we hit a code path that has a rather odd looking check on NDD_LABELING. -Should not have gotten near that path though - hence the problem is actually -when we call cxl_pmem_get_config_data() and it returns an error because -we haven't fully connected up the command in QEMU. - -Jonathan - - -> -> -> -> -> And x4 region still failed with same errors, using latest cxl/preview -> -> branch don't work. -> -> I have picked "Two CXL emulation fixes" patches in qemu, still not working. -> -> -> -> Bob - -On Thu, 11 Aug 2022 18:08:57 +0100 -Jonathan Cameron via <qemu-devel@nongnu.org> wrote: - -> -On Tue, 9 Aug 2022 17:08:25 +0100 -> -Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: -> -> -> On Tue, 9 Aug 2022 21:07:06 +0800 -> -> Bobo WL <lmw.bobo@gmail.com> wrote: -> -> -> -> > Hi Jonathan -> -> > -> -> > Thanks for your reply! -> -> > -> -> > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron -> -> > <Jonathan.Cameron@huawei.com> wrote: -> -> > > -> -> > > Probably not related to your problem, but there is a disconnect in QEMU -> -> > > / -> -> > > kernel assumptionsaround the presence of an HDM decoder when a HB only -> -> > > has a single root port. Spec allows it to be provided or not as an -> -> > > implementation choice. -> -> > > Kernel assumes it isn't provide. Qemu assumes it is. -> -> > > -> -> > > The temporary solution is to throw in a second root port on the HB and -> -> > > not -> -> > > connect anything to it. Longer term I may special case this so that -> -> > > the particular -> -> > > decoder defaults to pass through settings in QEMU if there is only one -> -> > > root port. -> -> > > -> -> > -> -> > You are right! After adding an extra HB in qemu, I can create a x1 -> -> > region successfully. -> -> > But have some errors in Nvdimm: -> -> > -> -> > [ 74.925838] Unknown online node for memory at 0x10000000000, assuming -> -> > node 0 -> -> > [ 74.925846] Unknown target node for memory at 0x10000000000, assuming -> -> > node 0 -> -> > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe -> -> -> -> Ah. I've seen this one, but not chased it down yet. Was on my todo list to -> -> chase -> -> down. Once I reach this state I can verify the HDM Decode is correct which -> -> is what -> -> I've been using to test (Which wasn't true until earlier this week). -> -> I'm currently testing via devmem, more for historical reasons than because -> -> it makes -> -> that much sense anymore. -> -> -*embarassed cough*. We haven't fully hooked the LSA up in qemu yet. -> -I'd forgotten that was still on the todo list. I don't think it will -> -be particularly hard to do and will take a look in next few days. -> -> -Very very indirectly this error is causing a driver probe fail that means that -> -we hit a code path that has a rather odd looking check on NDD_LABELING. -> -Should not have gotten near that path though - hence the problem is actually -> -when we call cxl_pmem_get_config_data() and it returns an error because -> -we haven't fully connected up the command in QEMU. -So a least one bug in QEMU. We were not supporting variable length payloads on -mailbox -inputs (but were on outputs). That hasn't mattered until we get to LSA writes. -We just need to relax condition on the supplied length. - -diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c -index c352a935c4..fdda9529fe 100644 ---- a/hw/cxl/cxl-mailbox-utils.c -+++ b/hw/cxl/cxl-mailbox-utils.c -@@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) - cxl_cmd = &cxl_cmd_set[set][cmd]; - h = cxl_cmd->handler; - if (h) { -- if (len == cxl_cmd->in) { -+ if (len == cxl_cmd->in || !cxl_cmd->in) { - cxl_cmd->payload = cxl_dstate->mbox_reg_state + - A_CXL_DEV_CMD_PAYLOAD; - ret = (*h)(cxl_cmd, cxl_dstate, &len); - - -This lets the nvdimm/region probe fine, but I'm getting some issues with -namespace capacity so I'll look at what is causing that next. -Unfortunately I'm not that familiar with the driver/nvdimm side of things -so it's take a while to figure out what kicks off what! - -Jonathan - -> -> -Jonathan -> -> -> -> -> -> > -> -> > And x4 region still failed with same errors, using latest cxl/preview -> -> > branch don't work. -> -> > I have picked "Two CXL emulation fixes" patches in qemu, still not -> -> > working. -> -> > -> -> > Bob -> -> - -Jonathan Cameron wrote: -> -On Thu, 11 Aug 2022 18:08:57 +0100 -> -Jonathan Cameron via <qemu-devel@nongnu.org> wrote: -> -> -> On Tue, 9 Aug 2022 17:08:25 +0100 -> -> Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: -> -> -> -> > On Tue, 9 Aug 2022 21:07:06 +0800 -> -> > Bobo WL <lmw.bobo@gmail.com> wrote: -> -> > -> -> > > Hi Jonathan -> -> > > -> -> > > Thanks for your reply! -> -> > > -> -> > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron -> -> > > <Jonathan.Cameron@huawei.com> wrote: -> -> > > > -> -> > > > Probably not related to your problem, but there is a disconnect in -> -> > > > QEMU / -> -> > > > kernel assumptionsaround the presence of an HDM decoder when a HB only -> -> > > > has a single root port. Spec allows it to be provided or not as an -> -> > > > implementation choice. -> -> > > > Kernel assumes it isn't provide. Qemu assumes it is. -> -> > > > -> -> > > > The temporary solution is to throw in a second root port on the HB -> -> > > > and not -> -> > > > connect anything to it. Longer term I may special case this so that -> -> > > > the particular -> -> > > > decoder defaults to pass through settings in QEMU if there is only -> -> > > > one root port. -> -> > > > -> -> > > -> -> > > You are right! After adding an extra HB in qemu, I can create a x1 -> -> > > region successfully. -> -> > > But have some errors in Nvdimm: -> -> > > -> -> > > [ 74.925838] Unknown online node for memory at 0x10000000000, -> -> > > assuming node 0 -> -> > > [ 74.925846] Unknown target node for memory at 0x10000000000, -> -> > > assuming node 0 -> -> > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe -> -> > -> -> > Ah. I've seen this one, but not chased it down yet. Was on my todo list -> -> > to chase -> -> > down. Once I reach this state I can verify the HDM Decode is correct -> -> > which is what -> -> > I've been using to test (Which wasn't true until earlier this week). -> -> > I'm currently testing via devmem, more for historical reasons than -> -> > because it makes -> -> > that much sense anymore. -> -> -> -> *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. -> -> I'd forgotten that was still on the todo list. I don't think it will -> -> be particularly hard to do and will take a look in next few days. -> -> -> -> Very very indirectly this error is causing a driver probe fail that means -> -> that -> -> we hit a code path that has a rather odd looking check on NDD_LABELING. -> -> Should not have gotten near that path though - hence the problem is actually -> -> when we call cxl_pmem_get_config_data() and it returns an error because -> -> we haven't fully connected up the command in QEMU. -> -> -So a least one bug in QEMU. We were not supporting variable length payloads -> -on mailbox -> -inputs (but were on outputs). That hasn't mattered until we get to LSA -> -writes. -> -We just need to relax condition on the supplied length. -> -> -diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c -> -index c352a935c4..fdda9529fe 100644 -> ---- a/hw/cxl/cxl-mailbox-utils.c -> -+++ b/hw/cxl/cxl-mailbox-utils.c -> -@@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) -> -cxl_cmd = &cxl_cmd_set[set][cmd]; -> -h = cxl_cmd->handler; -> -if (h) { -> -- if (len == cxl_cmd->in) { -> -+ if (len == cxl_cmd->in || !cxl_cmd->in) { -> -cxl_cmd->payload = cxl_dstate->mbox_reg_state + -> -A_CXL_DEV_CMD_PAYLOAD; -> -ret = (*h)(cxl_cmd, cxl_dstate, &len); -> -> -> -This lets the nvdimm/region probe fine, but I'm getting some issues with -> -namespace capacity so I'll look at what is causing that next. -> -Unfortunately I'm not that familiar with the driver/nvdimm side of things -> -so it's take a while to figure out what kicks off what! -The whirlwind tour is that 'struct nd_region' instances that represent a -persitent memory address range are composed of one more mappings of -'struct nvdimm' objects. The nvdimm object is driven by the dimm driver -in drivers/nvdimm/dimm.c. That driver is mainly charged with unlocking -the dimm (if locked) and interrogating the label area to look for -namespace labels. - -The label command calls are routed to the '->ndctl()' callback that was -registered when the CXL nvdimm_bus_descriptor was created. That callback -handles both 'bus' scope calls, currently none for CXL, and per nvdimm -calls. cxl_pmem_nvdimm_ctl() translates those generic LIBNVDIMM commands -to CXL commands. - -The 'struct nvdimm' objects that the CXL side registers have the -NDD_LABELING flag set which means that namespaces need to be explicitly -created / provisioned from region capacity. Otherwise, if -drivers/nvdimm/dimm.c does not find a namespace-label-index block then -the region reverts to label-less mode and a default namespace equal to -the size of the region is instantiated. - -If you are seeing small mismatches in namespace capacity then it may -just be the fact that by default 'ndctl create-namespace' results in an -'fsdax' mode namespace which just means that it is a block device where -1.5% of the capacity is reserved for 'struct page' metadata. You should -be able to see namespace capacity == region capacity by doing "ndctl -create-namespace -m raw", and disable DAX operation. - -Hope that helps. - -On Fri, 12 Aug 2022 09:03:02 -0700 -Dan Williams <dan.j.williams@intel.com> wrote: - -> -Jonathan Cameron wrote: -> -> On Thu, 11 Aug 2022 18:08:57 +0100 -> -> Jonathan Cameron via <qemu-devel@nongnu.org> wrote: -> -> -> -> > On Tue, 9 Aug 2022 17:08:25 +0100 -> -> > Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: -> -> > -> -> > > On Tue, 9 Aug 2022 21:07:06 +0800 -> -> > > Bobo WL <lmw.bobo@gmail.com> wrote: -> -> > > -> -> > > > Hi Jonathan -> -> > > > -> -> > > > Thanks for your reply! -> -> > > > -> -> > > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron -> -> > > > <Jonathan.Cameron@huawei.com> wrote: -> -> > > > > -> -> > > > > Probably not related to your problem, but there is a disconnect in -> -> > > > > QEMU / -> -> > > > > kernel assumptionsaround the presence of an HDM decoder when a HB -> -> > > > > only -> -> > > > > has a single root port. Spec allows it to be provided or not as an -> -> > > > > implementation choice. -> -> > > > > Kernel assumes it isn't provide. Qemu assumes it is. -> -> > > > > -> -> > > > > The temporary solution is to throw in a second root port on the HB -> -> > > > > and not -> -> > > > > connect anything to it. Longer term I may special case this so -> -> > > > > that the particular -> -> > > > > decoder defaults to pass through settings in QEMU if there is only -> -> > > > > one root port. -> -> > > > > -> -> > > > -> -> > > > You are right! After adding an extra HB in qemu, I can create a x1 -> -> > > > region successfully. -> -> > > > But have some errors in Nvdimm: -> -> > > > -> -> > > > [ 74.925838] Unknown online node for memory at 0x10000000000, -> -> > > > assuming node 0 -> -> > > > [ 74.925846] Unknown target node for memory at 0x10000000000, -> -> > > > assuming node 0 -> -> > > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe -> -> > > > -> -> > > -> -> > > Ah. I've seen this one, but not chased it down yet. Was on my todo -> -> > > list to chase -> -> > > down. Once I reach this state I can verify the HDM Decode is correct -> -> > > which is what -> -> > > I've been using to test (Which wasn't true until earlier this week). -> -> > > I'm currently testing via devmem, more for historical reasons than -> -> > > because it makes -> -> > > that much sense anymore. -> -> > -> -> > *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. -> -> > I'd forgotten that was still on the todo list. I don't think it will -> -> > be particularly hard to do and will take a look in next few days. -> -> > -> -> > Very very indirectly this error is causing a driver probe fail that means -> -> > that -> -> > we hit a code path that has a rather odd looking check on NDD_LABELING. -> -> > Should not have gotten near that path though - hence the problem is -> -> > actually -> -> > when we call cxl_pmem_get_config_data() and it returns an error because -> -> > we haven't fully connected up the command in QEMU. -> -> -> -> So a least one bug in QEMU. We were not supporting variable length payloads -> -> on mailbox -> -> inputs (but were on outputs). That hasn't mattered until we get to LSA -> -> writes. -> -> We just need to relax condition on the supplied length. -> -> -> -> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c -> -> index c352a935c4..fdda9529fe 100644 -> -> --- a/hw/cxl/cxl-mailbox-utils.c -> -> +++ b/hw/cxl/cxl-mailbox-utils.c -> -> @@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) -> -> cxl_cmd = &cxl_cmd_set[set][cmd]; -> -> h = cxl_cmd->handler; -> -> if (h) { -> -> - if (len == cxl_cmd->in) { -> -> + if (len == cxl_cmd->in || !cxl_cmd->in) { -> -> cxl_cmd->payload = cxl_dstate->mbox_reg_state + -> -> A_CXL_DEV_CMD_PAYLOAD; -> -> ret = (*h)(cxl_cmd, cxl_dstate, &len); -> -> -> -> -> -> This lets the nvdimm/region probe fine, but I'm getting some issues with -> -> namespace capacity so I'll look at what is causing that next. -> -> Unfortunately I'm not that familiar with the driver/nvdimm side of things -> -> so it's take a while to figure out what kicks off what! -> -> -The whirlwind tour is that 'struct nd_region' instances that represent a -> -persitent memory address range are composed of one more mappings of -> -'struct nvdimm' objects. The nvdimm object is driven by the dimm driver -> -in drivers/nvdimm/dimm.c. That driver is mainly charged with unlocking -> -the dimm (if locked) and interrogating the label area to look for -> -namespace labels. -> -> -The label command calls are routed to the '->ndctl()' callback that was -> -registered when the CXL nvdimm_bus_descriptor was created. That callback -> -handles both 'bus' scope calls, currently none for CXL, and per nvdimm -> -calls. cxl_pmem_nvdimm_ctl() translates those generic LIBNVDIMM commands -> -to CXL commands. -> -> -The 'struct nvdimm' objects that the CXL side registers have the -> -NDD_LABELING flag set which means that namespaces need to be explicitly -> -created / provisioned from region capacity. Otherwise, if -> -drivers/nvdimm/dimm.c does not find a namespace-label-index block then -> -the region reverts to label-less mode and a default namespace equal to -> -the size of the region is instantiated. -> -> -If you are seeing small mismatches in namespace capacity then it may -> -just be the fact that by default 'ndctl create-namespace' results in an -> -'fsdax' mode namespace which just means that it is a block device where -> -1.5% of the capacity is reserved for 'struct page' metadata. You should -> -be able to see namespace capacity == region capacity by doing "ndctl -> -create-namespace -m raw", and disable DAX operation. -Currently ndctl create-namespace crashes qemu ;) -Which isn't ideal! - -> -> -Hope that helps. -Got me looking at the right code. Thanks! - -Jonathan - -On Fri, 12 Aug 2022 17:15:09 +0100 -Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: - -> -On Fri, 12 Aug 2022 09:03:02 -0700 -> -Dan Williams <dan.j.williams@intel.com> wrote: -> -> -> Jonathan Cameron wrote: -> -> > On Thu, 11 Aug 2022 18:08:57 +0100 -> -> > Jonathan Cameron via <qemu-devel@nongnu.org> wrote: -> -> > -> -> > > On Tue, 9 Aug 2022 17:08:25 +0100 -> -> > > Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: -> -> > > -> -> > > > On Tue, 9 Aug 2022 21:07:06 +0800 -> -> > > > Bobo WL <lmw.bobo@gmail.com> wrote: -> -> > > > -> -> > > > > Hi Jonathan -> -> > > > > -> -> > > > > Thanks for your reply! -> -> > > > > -> -> > > > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron -> -> > > > > <Jonathan.Cameron@huawei.com> wrote: -> -> > > > > > -> -> > > > > > Probably not related to your problem, but there is a disconnect -> -> > > > > > in QEMU / -> -> > > > > > kernel assumptionsaround the presence of an HDM decoder when a HB -> -> > > > > > only -> -> > > > > > has a single root port. Spec allows it to be provided or not as -> -> > > > > > an implementation choice. -> -> > > > > > Kernel assumes it isn't provide. Qemu assumes it is. -> -> > > > > > -> -> > > > > > The temporary solution is to throw in a second root port on the -> -> > > > > > HB and not -> -> > > > > > connect anything to it. Longer term I may special case this so -> -> > > > > > that the particular -> -> > > > > > decoder defaults to pass through settings in QEMU if there is -> -> > > > > > only one root port. -> -> > > > > > -> -> > > > > -> -> > > > > You are right! After adding an extra HB in qemu, I can create a x1 -> -> > > > > region successfully. -> -> > > > > But have some errors in Nvdimm: -> -> > > > > -> -> > > > > [ 74.925838] Unknown online node for memory at 0x10000000000, -> -> > > > > assuming node 0 -> -> > > > > [ 74.925846] Unknown target node for memory at 0x10000000000, -> -> > > > > assuming node 0 -> -> > > > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe -> -> > > > > -> -> > > > -> -> > > > Ah. I've seen this one, but not chased it down yet. Was on my todo -> -> > > > list to chase -> -> > > > down. Once I reach this state I can verify the HDM Decode is correct -> -> > > > which is what -> -> > > > I've been using to test (Which wasn't true until earlier this week). -> -> > > > I'm currently testing via devmem, more for historical reasons than -> -> > > > because it makes -> -> > > > that much sense anymore. -> -> > > -> -> > > *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. -> -> > > I'd forgotten that was still on the todo list. I don't think it will -> -> > > be particularly hard to do and will take a look in next few days. -> -> > > -> -> > > Very very indirectly this error is causing a driver probe fail that -> -> > > means that -> -> > > we hit a code path that has a rather odd looking check on NDD_LABELING. -> -> > > Should not have gotten near that path though - hence the problem is -> -> > > actually -> -> > > when we call cxl_pmem_get_config_data() and it returns an error because -> -> > > we haven't fully connected up the command in QEMU. -> -> > -> -> > So a least one bug in QEMU. We were not supporting variable length -> -> > payloads on mailbox -> -> > inputs (but were on outputs). That hasn't mattered until we get to LSA -> -> > writes. -> -> > We just need to relax condition on the supplied length. -> -> > -> -> > diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c -> -> > index c352a935c4..fdda9529fe 100644 -> -> > --- a/hw/cxl/cxl-mailbox-utils.c -> -> > +++ b/hw/cxl/cxl-mailbox-utils.c -> -> > @@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) -> -> > cxl_cmd = &cxl_cmd_set[set][cmd]; -> -> > h = cxl_cmd->handler; -> -> > if (h) { -> -> > - if (len == cxl_cmd->in) { -> -> > + if (len == cxl_cmd->in || !cxl_cmd->in) { -> -> > cxl_cmd->payload = cxl_dstate->mbox_reg_state + -> -> > A_CXL_DEV_CMD_PAYLOAD; -> -> > ret = (*h)(cxl_cmd, cxl_dstate, &len); -> -> > -> -> > -> -> > This lets the nvdimm/region probe fine, but I'm getting some issues with -> -> > namespace capacity so I'll look at what is causing that next. -> -> > Unfortunately I'm not that familiar with the driver/nvdimm side of things -> -> > so it's take a while to figure out what kicks off what! -> -> -> -> The whirlwind tour is that 'struct nd_region' instances that represent a -> -> persitent memory address range are composed of one more mappings of -> -> 'struct nvdimm' objects. The nvdimm object is driven by the dimm driver -> -> in drivers/nvdimm/dimm.c. That driver is mainly charged with unlocking -> -> the dimm (if locked) and interrogating the label area to look for -> -> namespace labels. -> -> -> -> The label command calls are routed to the '->ndctl()' callback that was -> -> registered when the CXL nvdimm_bus_descriptor was created. That callback -> -> handles both 'bus' scope calls, currently none for CXL, and per nvdimm -> -> calls. cxl_pmem_nvdimm_ctl() translates those generic LIBNVDIMM commands -> -> to CXL commands. -> -> -> -> The 'struct nvdimm' objects that the CXL side registers have the -> -> NDD_LABELING flag set which means that namespaces need to be explicitly -> -> created / provisioned from region capacity. Otherwise, if -> -> drivers/nvdimm/dimm.c does not find a namespace-label-index block then -> -> the region reverts to label-less mode and a default namespace equal to -> -> the size of the region is instantiated. -> -> -> -> If you are seeing small mismatches in namespace capacity then it may -> -> just be the fact that by default 'ndctl create-namespace' results in an -> -> 'fsdax' mode namespace which just means that it is a block device where -> -> 1.5% of the capacity is reserved for 'struct page' metadata. You should -> -> be able to see namespace capacity == region capacity by doing "ndctl -> -> create-namespace -m raw", and disable DAX operation. -> -> -Currently ndctl create-namespace crashes qemu ;) -> -Which isn't ideal! -> -Found a cause for this one. Mailbox payload may be as small as 256 bytes. -We have code in kernel sanity checking that output payload fits in the -mailbox, but nothing on the input payload. Symptom is that we write just -off the end whatever size the payload is. Note doing this shouldn't crash -qemu - so I need to fix a range check somewhere. - -I think this is because cxl_pmem_get_config_size() returns the mailbox -payload size as being the available LSA size, forgetting to remove the -size of the headers on the set_lsa side of things. -https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git/tree/drivers/cxl/pmem.c?h=next#n110 -I've hacked the max_payload to be -8 - -Now we still don't succeed in creating the namespace, but bonus is it doesn't -crash any more. - - -Jonathan - - - -> -> -> -> Hope that helps. -> -Got me looking at the right code. Thanks! -> -> -Jonathan -> -> - -On Mon, 15 Aug 2022 15:18:09 +0100 -Jonathan Cameron via <qemu-devel@nongnu.org> wrote: - -> -On Fri, 12 Aug 2022 17:15:09 +0100 -> -Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: -> -> -> On Fri, 12 Aug 2022 09:03:02 -0700 -> -> Dan Williams <dan.j.williams@intel.com> wrote: -> -> -> -> > Jonathan Cameron wrote: -> -> > > On Thu, 11 Aug 2022 18:08:57 +0100 -> -> > > Jonathan Cameron via <qemu-devel@nongnu.org> wrote: -> -> > > -> -> > > > On Tue, 9 Aug 2022 17:08:25 +0100 -> -> > > > Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: -> -> > > > -> -> > > > > On Tue, 9 Aug 2022 21:07:06 +0800 -> -> > > > > Bobo WL <lmw.bobo@gmail.com> wrote: -> -> > > > > -> -> > > > > > Hi Jonathan -> -> > > > > > -> -> > > > > > Thanks for your reply! -> -> > > > > > -> -> > > > > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron -> -> > > > > > <Jonathan.Cameron@huawei.com> wrote: -> -> > > > > > > -> -> > > > > > > Probably not related to your problem, but there is a disconnect -> -> > > > > > > in QEMU / -> -> > > > > > > kernel assumptionsaround the presence of an HDM decoder when a -> -> > > > > > > HB only -> -> > > > > > > has a single root port. Spec allows it to be provided or not as -> -> > > > > > > an implementation choice. -> -> > > > > > > Kernel assumes it isn't provide. Qemu assumes it is. -> -> > > > > > > -> -> > > > > > > The temporary solution is to throw in a second root port on the -> -> > > > > > > HB and not -> -> > > > > > > connect anything to it. Longer term I may special case this so -> -> > > > > > > that the particular -> -> > > > > > > decoder defaults to pass through settings in QEMU if there is -> -> > > > > > > only one root port. -> -> > > > > > > -> -> > > > > > -> -> > > > > > You are right! After adding an extra HB in qemu, I can create a x1 -> -> > > > > > region successfully. -> -> > > > > > But have some errors in Nvdimm: -> -> > > > > > -> -> > > > > > [ 74.925838] Unknown online node for memory at 0x10000000000, -> -> > > > > > assuming node 0 -> -> > > > > > [ 74.925846] Unknown target node for memory at 0x10000000000, -> -> > > > > > assuming node 0 -> -> > > > > > [ 74.927470] nd_region region0: nmem0: is disabled, failing -> -> > > > > > probe -> -> > > > > -> -> > > > > Ah. I've seen this one, but not chased it down yet. Was on my todo -> -> > > > > list to chase -> -> > > > > down. Once I reach this state I can verify the HDM Decode is -> -> > > > > correct which is what -> -> > > > > I've been using to test (Which wasn't true until earlier this -> -> > > > > week). -> -> > > > > I'm currently testing via devmem, more for historical reasons than -> -> > > > > because it makes -> -> > > > > that much sense anymore. -> -> > > > -> -> > > > *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. -> -> > > > I'd forgotten that was still on the todo list. I don't think it will -> -> > > > be particularly hard to do and will take a look in next few days. -> -> > > > -> -> > > > Very very indirectly this error is causing a driver probe fail that -> -> > > > means that -> -> > > > we hit a code path that has a rather odd looking check on -> -> > > > NDD_LABELING. -> -> > > > Should not have gotten near that path though - hence the problem is -> -> > > > actually -> -> > > > when we call cxl_pmem_get_config_data() and it returns an error -> -> > > > because -> -> > > > we haven't fully connected up the command in QEMU. -> -> > > -> -> > > So a least one bug in QEMU. We were not supporting variable length -> -> > > payloads on mailbox -> -> > > inputs (but were on outputs). That hasn't mattered until we get to LSA -> -> > > writes. -> -> > > We just need to relax condition on the supplied length. -> -> > > -> -> > > diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c -> -> > > index c352a935c4..fdda9529fe 100644 -> -> > > --- a/hw/cxl/cxl-mailbox-utils.c -> -> > > +++ b/hw/cxl/cxl-mailbox-utils.c -> -> > > @@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) -> -> > > cxl_cmd = &cxl_cmd_set[set][cmd]; -> -> > > h = cxl_cmd->handler; -> -> > > if (h) { -> -> > > - if (len == cxl_cmd->in) { -> -> > > + if (len == cxl_cmd->in || !cxl_cmd->in) { -> -> > > cxl_cmd->payload = cxl_dstate->mbox_reg_state + -> -> > > A_CXL_DEV_CMD_PAYLOAD; -> -> > > ret = (*h)(cxl_cmd, cxl_dstate, &len); -> -> > > -> -> > > -> -> > > This lets the nvdimm/region probe fine, but I'm getting some issues with -> -> > > namespace capacity so I'll look at what is causing that next. -> -> > > Unfortunately I'm not that familiar with the driver/nvdimm side of -> -> > > things -> -> > > so it's take a while to figure out what kicks off what! -> -> > -> -> > The whirlwind tour is that 'struct nd_region' instances that represent a -> -> > persitent memory address range are composed of one more mappings of -> -> > 'struct nvdimm' objects. The nvdimm object is driven by the dimm driver -> -> > in drivers/nvdimm/dimm.c. That driver is mainly charged with unlocking -> -> > the dimm (if locked) and interrogating the label area to look for -> -> > namespace labels. -> -> > -> -> > The label command calls are routed to the '->ndctl()' callback that was -> -> > registered when the CXL nvdimm_bus_descriptor was created. That callback -> -> > handles both 'bus' scope calls, currently none for CXL, and per nvdimm -> -> > calls. cxl_pmem_nvdimm_ctl() translates those generic LIBNVDIMM commands -> -> > to CXL commands. -> -> > -> -> > The 'struct nvdimm' objects that the CXL side registers have the -> -> > NDD_LABELING flag set which means that namespaces need to be explicitly -> -> > created / provisioned from region capacity. Otherwise, if -> -> > drivers/nvdimm/dimm.c does not find a namespace-label-index block then -> -> > the region reverts to label-less mode and a default namespace equal to -> -> > the size of the region is instantiated. -> -> > -> -> > If you are seeing small mismatches in namespace capacity then it may -> -> > just be the fact that by default 'ndctl create-namespace' results in an -> -> > 'fsdax' mode namespace which just means that it is a block device where -> -> > 1.5% of the capacity is reserved for 'struct page' metadata. You should -> -> > be able to see namespace capacity == region capacity by doing "ndctl -> -> > create-namespace -m raw", and disable DAX operation. -> -> -> -> Currently ndctl create-namespace crashes qemu ;) -> -> Which isn't ideal! -> -> -> -> -Found a cause for this one. Mailbox payload may be as small as 256 bytes. -> -We have code in kernel sanity checking that output payload fits in the -> -mailbox, but nothing on the input payload. Symptom is that we write just -> -off the end whatever size the payload is. Note doing this shouldn't crash -> -qemu - so I need to fix a range check somewhere. -> -> -I think this is because cxl_pmem_get_config_size() returns the mailbox -> -payload size as being the available LSA size, forgetting to remove the -> -size of the headers on the set_lsa side of things. -> -https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git/tree/drivers/cxl/pmem.c?h=next#n110 -> -> -I've hacked the max_payload to be -8 -> -> -Now we still don't succeed in creating the namespace, but bonus is it doesn't -> -crash any more. -In the interests of defensive / correct handling from QEMU I took a -look into why it was crashing. Turns out that providing a NULL write callback -for -the memory device region (that the above overlarge write was spilling into) -isn't -a safe thing to do. Needs a stub. Oops. - -On plus side we might never have noticed this was going wrong without the crash -*silver lining in every cloud* - -Fix to follow... - -Jonathan - - -> -> -> -Jonathan -> -> -> -> -> > -> -> > Hope that helps. -> -> Got me looking at the right code. Thanks! -> -> -> -> Jonathan -> -> -> -> -> -> - -On Mon, 15 Aug 2022 at 15:55, Jonathan Cameron via <qemu-arm@nongnu.org> wrote: -> -In the interests of defensive / correct handling from QEMU I took a -> -look into why it was crashing. Turns out that providing a NULL write -> -callback for -> -the memory device region (that the above overlarge write was spilling into) -> -isn't -> -a safe thing to do. Needs a stub. Oops. -Yeah. We've talked before about adding an assert so that that kind of -"missing function" bug is caught at device creation rather than only -if the guest tries to access the device, but we never quite got around -to it... - --- PMM - -On Fri, 12 Aug 2022 16:44:03 +0100 -Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: - -> -On Thu, 11 Aug 2022 18:08:57 +0100 -> -Jonathan Cameron via <qemu-devel@nongnu.org> wrote: -> -> -> On Tue, 9 Aug 2022 17:08:25 +0100 -> -> Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: -> -> -> -> > On Tue, 9 Aug 2022 21:07:06 +0800 -> -> > Bobo WL <lmw.bobo@gmail.com> wrote: -> -> > -> -> > > Hi Jonathan -> -> > > -> -> > > Thanks for your reply! -> -> > > -> -> > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron -> -> > > <Jonathan.Cameron@huawei.com> wrote: -> -> > > > -> -> > > > Probably not related to your problem, but there is a disconnect in -> -> > > > QEMU / -> -> > > > kernel assumptionsaround the presence of an HDM decoder when a HB only -> -> > > > has a single root port. Spec allows it to be provided or not as an -> -> > > > implementation choice. -> -> > > > Kernel assumes it isn't provide. Qemu assumes it is. -> -> > > > -> -> > > > The temporary solution is to throw in a second root port on the HB -> -> > > > and not -> -> > > > connect anything to it. Longer term I may special case this so that -> -> > > > the particular -> -> > > > decoder defaults to pass through settings in QEMU if there is only -> -> > > > one root port. -> -> > > > -> -> > > -> -> > > You are right! After adding an extra HB in qemu, I can create a x1 -> -> > > region successfully. -> -> > > But have some errors in Nvdimm: -> -> > > -> -> > > [ 74.925838] Unknown online node for memory at 0x10000000000, -> -> > > assuming node 0 -> -> > > [ 74.925846] Unknown target node for memory at 0x10000000000, -> -> > > assuming node 0 -> -> > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe -> -> > > -> -> > -> -> > Ah. I've seen this one, but not chased it down yet. Was on my todo list -> -> > to chase -> -> > down. Once I reach this state I can verify the HDM Decode is correct -> -> > which is what -> -> > I've been using to test (Which wasn't true until earlier this week). -> -> > I'm currently testing via devmem, more for historical reasons than -> -> > because it makes -> -> > that much sense anymore. -> -> -> -> *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. -> -> I'd forgotten that was still on the todo list. I don't think it will -> -> be particularly hard to do and will take a look in next few days. -> -> -> -> Very very indirectly this error is causing a driver probe fail that means -> -> that -> -> we hit a code path that has a rather odd looking check on NDD_LABELING. -> -> Should not have gotten near that path though - hence the problem is actually -> -> when we call cxl_pmem_get_config_data() and it returns an error because -> -> we haven't fully connected up the command in QEMU. -> -> -So a least one bug in QEMU. We were not supporting variable length payloads -> -on mailbox -> -inputs (but were on outputs). That hasn't mattered until we get to LSA -> -writes. -> -We just need to relax condition on the supplied length. -> -> -diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c -> -index c352a935c4..fdda9529fe 100644 -> ---- a/hw/cxl/cxl-mailbox-utils.c -> -+++ b/hw/cxl/cxl-mailbox-utils.c -> -@@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) -> -cxl_cmd = &cxl_cmd_set[set][cmd]; -> -h = cxl_cmd->handler; -> -if (h) { -> -- if (len == cxl_cmd->in) { -> -+ if (len == cxl_cmd->in || !cxl_cmd->in) { -Fix is wrong as we use ~0 as the placeholder for variable payload, not 0. - -With that fixed we hit new fun paths - after some errors we get the -worrying - not totally sure but looks like a failure on an error cleanup. -I'll chase down the error source, but even then this is probably triggerable by -hardware problem or similar. Some bonus prints in here from me chasing -error paths, but it's otherwise just cxl/next + the fix I posted earlier today. - -[ 69.919877] nd_bus ndbus0: START: nd_region.probe(region0) -[ 69.920108] nd_region_probe -[ 69.920623] ------------[ cut here ]------------ -[ 69.920675] refcount_t: addition on 0; use-after-free. -[ 69.921314] WARNING: CPU: 3 PID: 710 at lib/refcount.c:25 -refcount_warn_saturate+0xa0/0x144 -[ 69.926949] Modules linked in: cxl_pmem cxl_mem cxl_pci cxl_port cxl_acpi -cxl_core -[ 69.928830] CPU: 3 PID: 710 Comm: kworker/u8:9 Not tainted 5.19.0-rc3+ #399 -[ 69.930596] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 -[ 69.931482] Workqueue: events_unbound async_run_entry_fn -[ 69.932403] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) -[ 69.934023] pc : refcount_warn_saturate+0xa0/0x144 -[ 69.935161] lr : refcount_warn_saturate+0xa0/0x144 -[ 69.936541] sp : ffff80000890b960 -[ 69.937921] x29: ffff80000890b960 x28: 0000000000000000 x27: 0000000000000000 -[ 69.940917] x26: ffffa54a90d5cb10 x25: ffffa54a90809e98 x24: 0000000000000000 -[ 69.942537] x23: ffffa54a91a3d8d8 x22: ffff0000c5254800 x21: ffff0000c5254800 -[ 69.944013] x20: ffff0000ce924180 x19: ffff0000c5254800 x18: ffffffffffffffff -[ 69.946100] x17: ffff5ab66e5ef000 x16: ffff80000801c000 x15: 0000000000000000 -[ 69.947585] x14: 0000000000000001 x13: 0a2e656572662d72 x12: 657466612d657375 -[ 69.948670] x11: 203b30206e6f206e x10: 6f69746964646120 x9 : ffffa54a8f63d288 -[ 69.950679] x8 : 206e6f206e6f6974 x7 : 69646461203a745f x6 : 00000000fffff31e -[ 69.952113] x5 : ffff0000ff61ba08 x4 : 00000000fffff31e x3 : ffff5ab66e5ef000 -root@debian:/sys/bus/cxl/devices/decoder0.0/region0# [ 69.954752] x2 : -0000000000000000 x1 : 0000000000000000 x0 : ffff0000c512e740 -[ 69.957098] Call trace: -[ 69.957959] refcount_warn_saturate+0xa0/0x144 -[ 69.958773] get_ndd+0x5c/0x80 -[ 69.959294] nd_region_register_namespaces+0xe4/0xe90 -[ 69.960253] nd_region_probe+0x100/0x290 -[ 69.960796] nvdimm_bus_probe+0xf4/0x1c0 -[ 69.962087] really_probe+0x19c/0x3f0 -[ 69.962620] __driver_probe_device+0x11c/0x190 -[ 69.963258] driver_probe_device+0x44/0xf4 -[ 69.963773] __device_attach_driver+0xa4/0x140 -[ 69.964471] bus_for_each_drv+0x84/0xe0 -[ 69.965068] __device_attach+0xb0/0x1f0 -[ 69.966101] device_initial_probe+0x20/0x30 -[ 69.967142] bus_probe_device+0xa4/0xb0 -[ 69.968104] device_add+0x3e8/0x910 -[ 69.969111] nd_async_device_register+0x24/0x74 -[ 69.969928] async_run_entry_fn+0x40/0x150 -[ 69.970725] process_one_work+0x1dc/0x450 -[ 69.971796] worker_thread+0x154/0x450 -[ 69.972700] kthread+0x118/0x120 -[ 69.974141] ret_from_fork+0x10/0x20 -[ 69.975141] ---[ end trace 0000000000000000 ]--- -[ 70.117887] Into nd_namespace_pmem_set_resource() - -> -cxl_cmd->payload = cxl_dstate->mbox_reg_state + -> -A_CXL_DEV_CMD_PAYLOAD; -> -ret = (*h)(cxl_cmd, cxl_dstate, &len); -> -> -> -This lets the nvdimm/region probe fine, but I'm getting some issues with -> -namespace capacity so I'll look at what is causing that next. -> -Unfortunately I'm not that familiar with the driver/nvdimm side of things -> -so it's take a while to figure out what kicks off what! -> -> -Jonathan -> -> -> -> -> Jonathan -> -> -> -> -> -> > -> -> > > -> -> > > And x4 region still failed with same errors, using latest cxl/preview -> -> > > branch don't work. -> -> > > I have picked "Two CXL emulation fixes" patches in qemu, still not -> -> > > working. -> -> > > -> -> > > Bob -> -> -> -> -> - -On Mon, 15 Aug 2022 18:04:44 +0100 -Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: - -> -On Fri, 12 Aug 2022 16:44:03 +0100 -> -Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: -> -> -> On Thu, 11 Aug 2022 18:08:57 +0100 -> -> Jonathan Cameron via <qemu-devel@nongnu.org> wrote: -> -> -> -> > On Tue, 9 Aug 2022 17:08:25 +0100 -> -> > Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: -> -> > -> -> > > On Tue, 9 Aug 2022 21:07:06 +0800 -> -> > > Bobo WL <lmw.bobo@gmail.com> wrote: -> -> > > -> -> > > > Hi Jonathan -> -> > > > -> -> > > > Thanks for your reply! -> -> > > > -> -> > > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron -> -> > > > <Jonathan.Cameron@huawei.com> wrote: -> -> > > > > -> -> > > > > Probably not related to your problem, but there is a disconnect in -> -> > > > > QEMU / -> -> > > > > kernel assumptionsaround the presence of an HDM decoder when a HB -> -> > > > > only -> -> > > > > has a single root port. Spec allows it to be provided or not as an -> -> > > > > implementation choice. -> -> > > > > Kernel assumes it isn't provide. Qemu assumes it is. -> -> > > > > -> -> > > > > The temporary solution is to throw in a second root port on the HB -> -> > > > > and not -> -> > > > > connect anything to it. Longer term I may special case this so -> -> > > > > that the particular -> -> > > > > decoder defaults to pass through settings in QEMU if there is only -> -> > > > > one root port. -> -> > > > > -> -> > > > -> -> > > > You are right! After adding an extra HB in qemu, I can create a x1 -> -> > > > region successfully. -> -> > > > But have some errors in Nvdimm: -> -> > > > -> -> > > > [ 74.925838] Unknown online node for memory at 0x10000000000, -> -> > > > assuming node 0 -> -> > > > [ 74.925846] Unknown target node for memory at 0x10000000000, -> -> > > > assuming node 0 -> -> > > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe -> -> > > > -> -> > > -> -> > > Ah. I've seen this one, but not chased it down yet. Was on my todo -> -> > > list to chase -> -> > > down. Once I reach this state I can verify the HDM Decode is correct -> -> > > which is what -> -> > > I've been using to test (Which wasn't true until earlier this week). -> -> > > I'm currently testing via devmem, more for historical reasons than -> -> > > because it makes -> -> > > that much sense anymore. -> -> > -> -> > *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. -> -> > I'd forgotten that was still on the todo list. I don't think it will -> -> > be particularly hard to do and will take a look in next few days. -> -> > -> -> > Very very indirectly this error is causing a driver probe fail that means -> -> > that -> -> > we hit a code path that has a rather odd looking check on NDD_LABELING. -> -> > Should not have gotten near that path though - hence the problem is -> -> > actually -> -> > when we call cxl_pmem_get_config_data() and it returns an error because -> -> > we haven't fully connected up the command in QEMU. -> -> -> -> So a least one bug in QEMU. We were not supporting variable length payloads -> -> on mailbox -> -> inputs (but were on outputs). That hasn't mattered until we get to LSA -> -> writes. -> -> We just need to relax condition on the supplied length. -> -> -> -> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c -> -> index c352a935c4..fdda9529fe 100644 -> -> --- a/hw/cxl/cxl-mailbox-utils.c -> -> +++ b/hw/cxl/cxl-mailbox-utils.c -> -> @@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) -> -> cxl_cmd = &cxl_cmd_set[set][cmd]; -> -> h = cxl_cmd->handler; -> -> if (h) { -> -> - if (len == cxl_cmd->in) { -> -> + if (len == cxl_cmd->in || !cxl_cmd->in) { -> -Fix is wrong as we use ~0 as the placeholder for variable payload, not 0. -Cause of the error is a failure in GET_LSA. -Reason, payload length is wrong in QEMU but was hidden previously by my wrong -fix here. Probably still a good idea to inject an error in GET_LSA and chase -down the refcount issue. - - -diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c -index fdda9529fe..e8565fbd6e 100644 ---- a/hw/cxl/cxl-mailbox-utils.c -+++ b/hw/cxl/cxl-mailbox-utils.c -@@ -489,7 +489,7 @@ static struct cxl_cmd cxl_cmd_set[256][256] = { - cmd_identify_memory_device, 0, 0 }, - [CCLS][GET_PARTITION_INFO] = { "CCLS_GET_PARTITION_INFO", - cmd_ccls_get_partition_info, 0, 0 }, -- [CCLS][GET_LSA] = { "CCLS_GET_LSA", cmd_ccls_get_lsa, 0, 0 }, -+ [CCLS][GET_LSA] = { "CCLS_GET_LSA", cmd_ccls_get_lsa, 8, 0 }, - [CCLS][SET_LSA] = { "CCLS_SET_LSA", cmd_ccls_set_lsa, - ~0, IMMEDIATE_CONFIG_CHANGE | IMMEDIATE_DATA_CHANGE }, - [MEDIA_AND_POISON][GET_POISON_LIST] = { "MEDIA_AND_POISON_GET_POISON_LIST", -@@ -510,12 +510,13 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) - cxl_cmd = &cxl_cmd_set[set][cmd]; - h = cxl_cmd->handler; - if (h) { -- if (len == cxl_cmd->in || !cxl_cmd->in) { -+ if (len == cxl_cmd->in || cxl_cmd->in == ~0) { - cxl_cmd->payload = cxl_dstate->mbox_reg_state + - A_CXL_DEV_CMD_PAYLOAD; - -And woot, we get a namespace in the LSA :) - -I'll post QEMU fixes in next day or two. Kernel side now seems more or less -fine be it with suspicious refcount underflow. - -> -> -With that fixed we hit new fun paths - after some errors we get the -> -worrying - not totally sure but looks like a failure on an error cleanup. -> -I'll chase down the error source, but even then this is probably triggerable -> -by -> -hardware problem or similar. Some bonus prints in here from me chasing -> -error paths, but it's otherwise just cxl/next + the fix I posted earlier -> -today. -> -> -[ 69.919877] nd_bus ndbus0: START: nd_region.probe(region0) -> -[ 69.920108] nd_region_probe -> -[ 69.920623] ------------[ cut here ]------------ -> -[ 69.920675] refcount_t: addition on 0; use-after-free. -> -[ 69.921314] WARNING: CPU: 3 PID: 710 at lib/refcount.c:25 -> -refcount_warn_saturate+0xa0/0x144 -> -[ 69.926949] Modules linked in: cxl_pmem cxl_mem cxl_pci cxl_port cxl_acpi -> -cxl_core -> -[ 69.928830] CPU: 3 PID: 710 Comm: kworker/u8:9 Not tainted 5.19.0-rc3+ #399 -> -[ 69.930596] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 -> -[ 69.931482] Workqueue: events_unbound async_run_entry_fn -> -[ 69.932403] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) -> -[ 69.934023] pc : refcount_warn_saturate+0xa0/0x144 -> -[ 69.935161] lr : refcount_warn_saturate+0xa0/0x144 -> -[ 69.936541] sp : ffff80000890b960 -> -[ 69.937921] x29: ffff80000890b960 x28: 0000000000000000 x27: -> -0000000000000000 -> -[ 69.940917] x26: ffffa54a90d5cb10 x25: ffffa54a90809e98 x24: -> -0000000000000000 -> -[ 69.942537] x23: ffffa54a91a3d8d8 x22: ffff0000c5254800 x21: -> -ffff0000c5254800 -> -[ 69.944013] x20: ffff0000ce924180 x19: ffff0000c5254800 x18: -> -ffffffffffffffff -> -[ 69.946100] x17: ffff5ab66e5ef000 x16: ffff80000801c000 x15: -> -0000000000000000 -> -[ 69.947585] x14: 0000000000000001 x13: 0a2e656572662d72 x12: -> -657466612d657375 -> -[ 69.948670] x11: 203b30206e6f206e x10: 6f69746964646120 x9 : -> -ffffa54a8f63d288 -> -[ 69.950679] x8 : 206e6f206e6f6974 x7 : 69646461203a745f x6 : -> -00000000fffff31e -> -[ 69.952113] x5 : ffff0000ff61ba08 x4 : 00000000fffff31e x3 : -> -ffff5ab66e5ef000 -> -root@debian:/sys/bus/cxl/devices/decoder0.0/region0# [ 69.954752] x2 : -> -0000000000000000 x1 : 0000000000000000 x0 : ffff0000c512e740 -> -[ 69.957098] Call trace: -> -[ 69.957959] refcount_warn_saturate+0xa0/0x144 -> -[ 69.958773] get_ndd+0x5c/0x80 -> -[ 69.959294] nd_region_register_namespaces+0xe4/0xe90 -> -[ 69.960253] nd_region_probe+0x100/0x290 -> -[ 69.960796] nvdimm_bus_probe+0xf4/0x1c0 -> -[ 69.962087] really_probe+0x19c/0x3f0 -> -[ 69.962620] __driver_probe_device+0x11c/0x190 -> -[ 69.963258] driver_probe_device+0x44/0xf4 -> -[ 69.963773] __device_attach_driver+0xa4/0x140 -> -[ 69.964471] bus_for_each_drv+0x84/0xe0 -> -[ 69.965068] __device_attach+0xb0/0x1f0 -> -[ 69.966101] device_initial_probe+0x20/0x30 -> -[ 69.967142] bus_probe_device+0xa4/0xb0 -> -[ 69.968104] device_add+0x3e8/0x910 -> -[ 69.969111] nd_async_device_register+0x24/0x74 -> -[ 69.969928] async_run_entry_fn+0x40/0x150 -> -[ 69.970725] process_one_work+0x1dc/0x450 -> -[ 69.971796] worker_thread+0x154/0x450 -> -[ 69.972700] kthread+0x118/0x120 -> -[ 69.974141] ret_from_fork+0x10/0x20 -> -[ 69.975141] ---[ end trace 0000000000000000 ]--- -> -[ 70.117887] Into nd_namespace_pmem_set_resource() -> -> -> cxl_cmd->payload = cxl_dstate->mbox_reg_state + -> -> A_CXL_DEV_CMD_PAYLOAD; -> -> ret = (*h)(cxl_cmd, cxl_dstate, &len); -> -> -> -> -> -> This lets the nvdimm/region probe fine, but I'm getting some issues with -> -> namespace capacity so I'll look at what is causing that next. -> -> Unfortunately I'm not that familiar with the driver/nvdimm side of things -> -> so it's take a while to figure out what kicks off what! -> -> -> -> Jonathan -> -> -> -> > -> -> > Jonathan -> -> > -> -> > -> -> > > -> -> > > > -> -> > > > And x4 region still failed with same errors, using latest cxl/preview -> -> > > > branch don't work. -> -> > > > I have picked "Two CXL emulation fixes" patches in qemu, still not -> -> > > > working. -> -> > > > -> -> > > > Bob -> -> > -> -> > -> -> -> - -Jonathan Cameron wrote: -> -On Fri, 12 Aug 2022 16:44:03 +0100 -> -Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: -> -> -> On Thu, 11 Aug 2022 18:08:57 +0100 -> -> Jonathan Cameron via <qemu-devel@nongnu.org> wrote: -> -> -> -> > On Tue, 9 Aug 2022 17:08:25 +0100 -> -> > Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: -> -> > -> -> > > On Tue, 9 Aug 2022 21:07:06 +0800 -> -> > > Bobo WL <lmw.bobo@gmail.com> wrote: -> -> > > -> -> > > > Hi Jonathan -> -> > > > -> -> > > > Thanks for your reply! -> -> > > > -> -> > > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron -> -> > > > <Jonathan.Cameron@huawei.com> wrote: -> -> > > > > -> -> > > > > Probably not related to your problem, but there is a disconnect in -> -> > > > > QEMU / -> -> > > > > kernel assumptionsaround the presence of an HDM decoder when a HB -> -> > > > > only -> -> > > > > has a single root port. Spec allows it to be provided or not as an -> -> > > > > implementation choice. -> -> > > > > Kernel assumes it isn't provide. Qemu assumes it is. -> -> > > > > -> -> > > > > The temporary solution is to throw in a second root port on the HB -> -> > > > > and not -> -> > > > > connect anything to it. Longer term I may special case this so -> -> > > > > that the particular -> -> > > > > decoder defaults to pass through settings in QEMU if there is only -> -> > > > > one root port. -> -> > > > > -> -> > > > -> -> > > > You are right! After adding an extra HB in qemu, I can create a x1 -> -> > > > region successfully. -> -> > > > But have some errors in Nvdimm: -> -> > > > -> -> > > > [ 74.925838] Unknown online node for memory at 0x10000000000, -> -> > > > assuming node 0 -> -> > > > [ 74.925846] Unknown target node for memory at 0x10000000000, -> -> > > > assuming node 0 -> -> > > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe -> -> > > > -> -> > > -> -> > > Ah. I've seen this one, but not chased it down yet. Was on my todo -> -> > > list to chase -> -> > > down. Once I reach this state I can verify the HDM Decode is correct -> -> > > which is what -> -> > > I've been using to test (Which wasn't true until earlier this week). -> -> > > I'm currently testing via devmem, more for historical reasons than -> -> > > because it makes -> -> > > that much sense anymore. -> -> > -> -> > *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. -> -> > I'd forgotten that was still on the todo list. I don't think it will -> -> > be particularly hard to do and will take a look in next few days. -> -> > -> -> > Very very indirectly this error is causing a driver probe fail that means -> -> > that -> -> > we hit a code path that has a rather odd looking check on NDD_LABELING. -> -> > Should not have gotten near that path though - hence the problem is -> -> > actually -> -> > when we call cxl_pmem_get_config_data() and it returns an error because -> -> > we haven't fully connected up the command in QEMU. -> -> -> -> So a least one bug in QEMU. We were not supporting variable length payloads -> -> on mailbox -> -> inputs (but were on outputs). That hasn't mattered until we get to LSA -> -> writes. -> -> We just need to relax condition on the supplied length. -> -> -> -> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c -> -> index c352a935c4..fdda9529fe 100644 -> -> --- a/hw/cxl/cxl-mailbox-utils.c -> -> +++ b/hw/cxl/cxl-mailbox-utils.c -> -> @@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) -> -> cxl_cmd = &cxl_cmd_set[set][cmd]; -> -> h = cxl_cmd->handler; -> -> if (h) { -> -> - if (len == cxl_cmd->in) { -> -> + if (len == cxl_cmd->in || !cxl_cmd->in) { -> -Fix is wrong as we use ~0 as the placeholder for variable payload, not 0. -> -> -With that fixed we hit new fun paths - after some errors we get the -> -worrying - not totally sure but looks like a failure on an error cleanup. -> -I'll chase down the error source, but even then this is probably triggerable -> -by -> -hardware problem or similar. Some bonus prints in here from me chasing -> -error paths, but it's otherwise just cxl/next + the fix I posted earlier -> -today. -One of the scenarios that I cannot rule out is nvdimm_probe() racing -nd_region_probe(), but given all the work it takes to create a region I -suspect all the nvdimm_probe() work to have completed... - -It is at least one potentially wrong hypothesis that needs to be chased -down. - -> -> -[ 69.919877] nd_bus ndbus0: START: nd_region.probe(region0) -> -[ 69.920108] nd_region_probe -> -[ 69.920623] ------------[ cut here ]------------ -> -[ 69.920675] refcount_t: addition on 0; use-after-free. -> -[ 69.921314] WARNING: CPU: 3 PID: 710 at lib/refcount.c:25 -> -refcount_warn_saturate+0xa0/0x144 -> -[ 69.926949] Modules linked in: cxl_pmem cxl_mem cxl_pci cxl_port cxl_acpi -> -cxl_core -> -[ 69.928830] CPU: 3 PID: 710 Comm: kworker/u8:9 Not tainted 5.19.0-rc3+ #399 -> -[ 69.930596] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 -> -[ 69.931482] Workqueue: events_unbound async_run_entry_fn -> -[ 69.932403] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) -> -[ 69.934023] pc : refcount_warn_saturate+0xa0/0x144 -> -[ 69.935161] lr : refcount_warn_saturate+0xa0/0x144 -> -[ 69.936541] sp : ffff80000890b960 -> -[ 69.937921] x29: ffff80000890b960 x28: 0000000000000000 x27: -> -0000000000000000 -> -[ 69.940917] x26: ffffa54a90d5cb10 x25: ffffa54a90809e98 x24: -> -0000000000000000 -> -[ 69.942537] x23: ffffa54a91a3d8d8 x22: ffff0000c5254800 x21: -> -ffff0000c5254800 -> -[ 69.944013] x20: ffff0000ce924180 x19: ffff0000c5254800 x18: -> -ffffffffffffffff -> -[ 69.946100] x17: ffff5ab66e5ef000 x16: ffff80000801c000 x15: -> -0000000000000000 -> -[ 69.947585] x14: 0000000000000001 x13: 0a2e656572662d72 x12: -> -657466612d657375 -> -[ 69.948670] x11: 203b30206e6f206e x10: 6f69746964646120 x9 : -> -ffffa54a8f63d288 -> -[ 69.950679] x8 : 206e6f206e6f6974 x7 : 69646461203a745f x6 : -> -00000000fffff31e -> -[ 69.952113] x5 : ffff0000ff61ba08 x4 : 00000000fffff31e x3 : -> -ffff5ab66e5ef000 -> -root@debian:/sys/bus/cxl/devices/decoder0.0/region0# [ 69.954752] x2 : -> -0000000000000000 x1 : 0000000000000000 x0 : ffff0000c512e740 -> -[ 69.957098] Call trace: -> -[ 69.957959] refcount_warn_saturate+0xa0/0x144 -> -[ 69.958773] get_ndd+0x5c/0x80 -> -[ 69.959294] nd_region_register_namespaces+0xe4/0xe90 -> -[ 69.960253] nd_region_probe+0x100/0x290 -> -[ 69.960796] nvdimm_bus_probe+0xf4/0x1c0 -> -[ 69.962087] really_probe+0x19c/0x3f0 -> -[ 69.962620] __driver_probe_device+0x11c/0x190 -> -[ 69.963258] driver_probe_device+0x44/0xf4 -> -[ 69.963773] __device_attach_driver+0xa4/0x140 -> -[ 69.964471] bus_for_each_drv+0x84/0xe0 -> -[ 69.965068] __device_attach+0xb0/0x1f0 -> -[ 69.966101] device_initial_probe+0x20/0x30 -> -[ 69.967142] bus_probe_device+0xa4/0xb0 -> -[ 69.968104] device_add+0x3e8/0x910 -> -[ 69.969111] nd_async_device_register+0x24/0x74 -> -[ 69.969928] async_run_entry_fn+0x40/0x150 -> -[ 69.970725] process_one_work+0x1dc/0x450 -> -[ 69.971796] worker_thread+0x154/0x450 -> -[ 69.972700] kthread+0x118/0x120 -> -[ 69.974141] ret_from_fork+0x10/0x20 -> -[ 69.975141] ---[ end trace 0000000000000000 ]--- -> -[ 70.117887] Into nd_namespace_pmem_set_resource() - -On Mon, 15 Aug 2022 15:55:15 -0700 -Dan Williams <dan.j.williams@intel.com> wrote: - -> -Jonathan Cameron wrote: -> -> On Fri, 12 Aug 2022 16:44:03 +0100 -> -> Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: -> -> -> -> > On Thu, 11 Aug 2022 18:08:57 +0100 -> -> > Jonathan Cameron via <qemu-devel@nongnu.org> wrote: -> -> > -> -> > > On Tue, 9 Aug 2022 17:08:25 +0100 -> -> > > Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: -> -> > > -> -> > > > On Tue, 9 Aug 2022 21:07:06 +0800 -> -> > > > Bobo WL <lmw.bobo@gmail.com> wrote: -> -> > > > -> -> > > > > Hi Jonathan -> -> > > > > -> -> > > > > Thanks for your reply! -> -> > > > > -> -> > > > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron -> -> > > > > <Jonathan.Cameron@huawei.com> wrote: -> -> > > > > > -> -> > > > > > Probably not related to your problem, but there is a disconnect -> -> > > > > > in QEMU / -> -> > > > > > kernel assumptionsaround the presence of an HDM decoder when a HB -> -> > > > > > only -> -> > > > > > has a single root port. Spec allows it to be provided or not as -> -> > > > > > an implementation choice. -> -> > > > > > Kernel assumes it isn't provide. Qemu assumes it is. -> -> > > > > > -> -> > > > > > The temporary solution is to throw in a second root port on the -> -> > > > > > HB and not -> -> > > > > > connect anything to it. Longer term I may special case this so -> -> > > > > > that the particular -> -> > > > > > decoder defaults to pass through settings in QEMU if there is -> -> > > > > > only one root port. -> -> > > > > > -> -> > > > > -> -> > > > > You are right! After adding an extra HB in qemu, I can create a x1 -> -> > > > > region successfully. -> -> > > > > But have some errors in Nvdimm: -> -> > > > > -> -> > > > > [ 74.925838] Unknown online node for memory at 0x10000000000, -> -> > > > > assuming node 0 -> -> > > > > [ 74.925846] Unknown target node for memory at 0x10000000000, -> -> > > > > assuming node 0 -> -> > > > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe -> -> > > > > -> -> > > > -> -> > > > Ah. I've seen this one, but not chased it down yet. Was on my todo -> -> > > > list to chase -> -> > > > down. Once I reach this state I can verify the HDM Decode is correct -> -> > > > which is what -> -> > > > I've been using to test (Which wasn't true until earlier this week). -> -> > > > I'm currently testing via devmem, more for historical reasons than -> -> > > > because it makes -> -> > > > that much sense anymore. -> -> > > -> -> > > *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. -> -> > > I'd forgotten that was still on the todo list. I don't think it will -> -> > > be particularly hard to do and will take a look in next few days. -> -> > > -> -> > > Very very indirectly this error is causing a driver probe fail that -> -> > > means that -> -> > > we hit a code path that has a rather odd looking check on NDD_LABELING. -> -> > > Should not have gotten near that path though - hence the problem is -> -> > > actually -> -> > > when we call cxl_pmem_get_config_data() and it returns an error because -> -> > > we haven't fully connected up the command in QEMU. -> -> > -> -> > So a least one bug in QEMU. We were not supporting variable length -> -> > payloads on mailbox -> -> > inputs (but were on outputs). That hasn't mattered until we get to LSA -> -> > writes. -> -> > We just need to relax condition on the supplied length. -> -> > -> -> > diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c -> -> > index c352a935c4..fdda9529fe 100644 -> -> > --- a/hw/cxl/cxl-mailbox-utils.c -> -> > +++ b/hw/cxl/cxl-mailbox-utils.c -> -> > @@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) -> -> > cxl_cmd = &cxl_cmd_set[set][cmd]; -> -> > h = cxl_cmd->handler; -> -> > if (h) { -> -> > - if (len == cxl_cmd->in) { -> -> > + if (len == cxl_cmd->in || !cxl_cmd->in) { -> -> Fix is wrong as we use ~0 as the placeholder for variable payload, not 0. -> -> -> -> With that fixed we hit new fun paths - after some errors we get the -> -> worrying - not totally sure but looks like a failure on an error cleanup. -> -> I'll chase down the error source, but even then this is probably -> -> triggerable by -> -> hardware problem or similar. Some bonus prints in here from me chasing -> -> error paths, but it's otherwise just cxl/next + the fix I posted earlier -> -> today. -> -> -One of the scenarios that I cannot rule out is nvdimm_probe() racing -> -nd_region_probe(), but given all the work it takes to create a region I -> -suspect all the nvdimm_probe() work to have completed... -> -> -It is at least one potentially wrong hypothesis that needs to be chased -> -down. -Maybe there should be a special award for the non-intuitive -ndctl create-namespace command (modifies existing namespace and might create -a different empty one...) I'm sure there is some interesting history behind -that one :) - -Upshot is I just threw a filesystem on fsdax and wrote some text files on it -to allow easy grepping. The right data ends up in the memory and a plausible -namespace description is stored in the LSA. - -So to some degree at least it's 'working' on an 8 way direct connected -set of emulated devices. - -One snag is that serial number support isn't yet upstream in QEMU. -(I have had it in my tree for a while but not posted it yet because of - QEMU feature freeze) -https://gitlab.com/jic23/qemu/-/commit/144c783ea8a5fbe169f46ea1ba92940157f42733 -That's needed for meaningful cookie generation. Otherwise you can build the -namespace once, but it won't work on next probe as the cookie is 0 and you -hit some error paths. - -Maybe sensible to add a sanity check and fail namespace creation if -cookie is 0? (Silly side question, but is there a theoretical risk of -a serial number / other data combination leading to a fletcher64() -checksum that happens to be 0 - that would give a very odd bug report!) - -So to make it work the following is needed: - -1) The kernel fix for mailbox buffer overflow. -2) Qemu fix for size of arguements for get_lsa -3) Qemu fix to allow variable size input arguements (for set_lsa) -4) Serial number patch above + command lines to qemu to set appropriate - serial numbers. - -I'll send out the QEMU fixes shortly and post the Serial number patch, -though that almost certainly won't go in until next QEMU development -cycle starts in a few weeks. - -Next up, run through same tests on some other topologies. - -Jonathan - -> -> -> -> -> [ 69.919877] nd_bus ndbus0: START: nd_region.probe(region0) -> -> [ 69.920108] nd_region_probe -> -> [ 69.920623] ------------[ cut here ]------------ -> -> [ 69.920675] refcount_t: addition on 0; use-after-free. -> -> [ 69.921314] WARNING: CPU: 3 PID: 710 at lib/refcount.c:25 -> -> refcount_warn_saturate+0xa0/0x144 -> -> [ 69.926949] Modules linked in: cxl_pmem cxl_mem cxl_pci cxl_port -> -> cxl_acpi cxl_core -> -> [ 69.928830] CPU: 3 PID: 710 Comm: kworker/u8:9 Not tainted 5.19.0-rc3+ -> -> #399 -> -> [ 69.930596] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 -> -> 02/06/2015 -> -> [ 69.931482] Workqueue: events_unbound async_run_entry_fn -> -> [ 69.932403] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS -> -> BTYPE=--) -> -> [ 69.934023] pc : refcount_warn_saturate+0xa0/0x144 -> -> [ 69.935161] lr : refcount_warn_saturate+0xa0/0x144 -> -> [ 69.936541] sp : ffff80000890b960 -> -> [ 69.937921] x29: ffff80000890b960 x28: 0000000000000000 x27: -> -> 0000000000000000 -> -> [ 69.940917] x26: ffffa54a90d5cb10 x25: ffffa54a90809e98 x24: -> -> 0000000000000000 -> -> [ 69.942537] x23: ffffa54a91a3d8d8 x22: ffff0000c5254800 x21: -> -> ffff0000c5254800 -> -> [ 69.944013] x20: ffff0000ce924180 x19: ffff0000c5254800 x18: -> -> ffffffffffffffff -> -> [ 69.946100] x17: ffff5ab66e5ef000 x16: ffff80000801c000 x15: -> -> 0000000000000000 -> -> [ 69.947585] x14: 0000000000000001 x13: 0a2e656572662d72 x12: -> -> 657466612d657375 -> -> [ 69.948670] x11: 203b30206e6f206e x10: 6f69746964646120 x9 : -> -> ffffa54a8f63d288 -> -> [ 69.950679] x8 : 206e6f206e6f6974 x7 : 69646461203a745f x6 : -> -> 00000000fffff31e -> -> [ 69.952113] x5 : ffff0000ff61ba08 x4 : 00000000fffff31e x3 : -> -> ffff5ab66e5ef000 -> -> root@debian:/sys/bus/cxl/devices/decoder0.0/region0# [ 69.954752] x2 : -> -> 0000000000000000 x1 : 0000000000000000 x0 : ffff0000c512e740 -> -> [ 69.957098] Call trace: -> -> [ 69.957959] refcount_warn_saturate+0xa0/0x144 -> -> [ 69.958773] get_ndd+0x5c/0x80 -> -> [ 69.959294] nd_region_register_namespaces+0xe4/0xe90 -> -> [ 69.960253] nd_region_probe+0x100/0x290 -> -> [ 69.960796] nvdimm_bus_probe+0xf4/0x1c0 -> -> [ 69.962087] really_probe+0x19c/0x3f0 -> -> [ 69.962620] __driver_probe_device+0x11c/0x190 -> -> [ 69.963258] driver_probe_device+0x44/0xf4 -> -> [ 69.963773] __device_attach_driver+0xa4/0x140 -> -> [ 69.964471] bus_for_each_drv+0x84/0xe0 -> -> [ 69.965068] __device_attach+0xb0/0x1f0 -> -> [ 69.966101] device_initial_probe+0x20/0x30 -> -> [ 69.967142] bus_probe_device+0xa4/0xb0 -> -> [ 69.968104] device_add+0x3e8/0x910 -> -> [ 69.969111] nd_async_device_register+0x24/0x74 -> -> [ 69.969928] async_run_entry_fn+0x40/0x150 -> -> [ 69.970725] process_one_work+0x1dc/0x450 -> -> [ 69.971796] worker_thread+0x154/0x450 -> -> [ 69.972700] kthread+0x118/0x120 -> -> [ 69.974141] ret_from_fork+0x10/0x20 -> -> [ 69.975141] ---[ end trace 0000000000000000 ]--- -> -> [ 70.117887] Into nd_namespace_pmem_set_resource() - -Bobo WL wrote: -> -Hi list -> -> -I want to test cxl functions in arm64, and found some problems I can't -> -figure out. -> -> -My test environment: -> -> -1. build latest bios from -https://github.com/tianocore/edk2.git -master -> -branch(cc2db6ebfb6d9d85ba4c7b35fba1fa37fffc0bc2) -> -2. build latest qemu-system-aarch64 from git://git.qemu.org/qemu.git -> -master branch(846dcf0ba4eff824c295f06550b8673ff3f31314). With cxl arm -> -support patch: -> -https://patchwork.kernel.org/project/cxl/cover/20220616141950.23374-1-Jonathan.Cameron@huawei.com/ -> -3. build Linux kernel from -> -https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git -preview -> -branch(65fc1c3d26b96002a5aa1f4012fae4dc98fd5683) -> -4. build latest ndctl tools from -https://github.com/pmem/ndctl -> -create_region branch(8558b394e449779e3a4f3ae90fae77ede0bca159) -> -> -And my qemu test commands: -> -sudo $QEMU_BIN -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 \ -> --cpu max -smp 8 -nographic -no-reboot \ -> --kernel $KERNEL -bios $BIOS_BIN \ -> --drive if=none,file=$ROOTFS,format=qcow2,id=hd \ -> --device virtio-blk-pci,drive=hd -append 'root=/dev/vda1 -> -nokaslr dyndbg="module cxl* +p"' \ -> --object memory-backend-ram,size=4G,id=mem0 \ -> --numa node,nodeid=0,cpus=0-7,memdev=mem0 \ -> --net nic -net user,hostfwd=tcp::2222-:22 -enable-kvm \ -> --object -> -memory-backend-file,id=cxl-mem0,share=on,mem-path=/tmp/cxltest.raw,size=256M -> -\ -> --object -> -memory-backend-file,id=cxl-mem1,share=on,mem-path=/tmp/cxltest1.raw,size=256M -> -\ -> --object -> -memory-backend-file,id=cxl-mem2,share=on,mem-path=/tmp/cxltest2.raw,size=256M -> -\ -> --object -> -memory-backend-file,id=cxl-mem3,share=on,mem-path=/tmp/cxltest3.raw,size=256M -> -\ -> --object -> -memory-backend-file,id=cxl-lsa0,share=on,mem-path=/tmp/lsa0.raw,size=256M -> -\ -> --object -> -memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/lsa1.raw,size=256M -> -\ -> --object -> -memory-backend-file,id=cxl-lsa2,share=on,mem-path=/tmp/lsa2.raw,size=256M -> -\ -> --object -> -memory-backend-file,id=cxl-lsa3,share=on,mem-path=/tmp/lsa3.raw,size=256M -> -\ -> --device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \ -> --device cxl-rp,port=0,bus=cxl.1,id=root_port0,chassis=0,slot=0 \ -> --device cxl-upstream,bus=root_port0,id=us0 \ -> --device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \ -> --device -> -cxl-type3,bus=swport0,memdev=cxl-mem0,lsa=cxl-lsa0,id=cxl-pmem0 \ -> --device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \ -> --device -> -cxl-type3,bus=swport1,memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem1 \ -> --device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \ -> --device -> -cxl-type3,bus=swport2,memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem2 \ -> --device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \ -> --device -> -cxl-type3,bus=swport3,memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3 \ -> --M -> -cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k -> -> -And I have got two problems. -> -1. When I want to create x1 region with command: "cxl create-region -d -> -decoder0.0 -w 1 -g 4096 mem0", kernel crashed with null pointer -> -reference. Crash log: -> -> -[ 534.697324] cxl_region region0: config state: 0 -> -[ 534.697346] cxl_region region0: probe: -6 -> -[ 534.697368] cxl_acpi ACPI0017:00: decoder0.0: created region0 -> -[ 534.699115] cxl region0: mem0:endpoint3 decoder3.0 add: -> -mem0:decoder3.0 @ 0 next: none nr_eps: 1 nr_targets: 1 -> -[ 534.699149] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: -> -mem0:decoder3.0 @ 0 next: mem0 nr_eps: 1 nr_targets: 1 -> -[ 534.699167] cxl region0: ACPI0016:00:port1 decoder1.0 add: -> -mem0:decoder3.0 @ 0 next: 0000:0d:00.0 nr_eps: 1 nr_targets: 1 -> -[ 534.699176] cxl region0: ACPI0016:00:port1 iw: 1 ig: 256 -> -[ 534.699182] cxl region0: ACPI0016:00:port1 target[0] = 0000:0c:00.0 -> -for mem0:decoder3.0 @ 0 -> -[ 534.699189] cxl region0: 0000:0d:00.0:port2 iw: 1 ig: 256 -> -[ 534.699193] cxl region0: 0000:0d:00.0:port2 target[0] = -> -0000:0e:00.0 for mem0:decoder3.0 @ 0 -> -[ 534.699405] Unable to handle kernel NULL pointer dereference at -> -virtual address 0000000000000000 -> -[ 534.701474] Mem abort info: -> -[ 534.701994] ESR = 0x0000000086000004 -> -[ 534.702653] EC = 0x21: IABT (current EL), IL = 32 bits -> -[ 534.703616] SET = 0, FnV = 0 -> -[ 534.704174] EA = 0, S1PTW = 0 -> -[ 534.704803] FSC = 0x04: level 0 translation fault -> -[ 534.705694] user pgtable: 4k pages, 48-bit VAs, pgdp=000000010144a000 -> -[ 534.706875] [0000000000000000] pgd=0000000000000000, p4d=0000000000000000 -> -[ 534.709855] Internal error: Oops: 86000004 [#1] PREEMPT SMP -> -[ 534.710301] Modules linked in: -> -[ 534.710546] CPU: 7 PID: 331 Comm: cxl Not tainted -> -5.19.0-rc3-00064-g65fc1c3d26b9-dirty #11 -> -[ 534.715393] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 -> -[ 534.717179] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) -> -[ 534.719190] pc : 0x0 -> -[ 534.719928] lr : commit_store+0x118/0x2cc -> -[ 534.721007] sp : ffff80000aec3c30 -> -[ 534.721793] x29: ffff80000aec3c30 x28: ffff0000da62e740 x27: -> -ffff0000c0c06b30 -> -[ 534.723875] x26: 0000000000000000 x25: ffff0000c0a2a400 x24: -> -ffff0000c0a29400 -> -[ 534.725440] x23: 0000000000000003 x22: 0000000000000000 x21: -> -ffff0000c0c06800 -> -[ 534.727312] x20: 0000000000000000 x19: ffff0000c1559800 x18: -> -0000000000000000 -> -[ 534.729138] x17: 0000000000000000 x16: 0000000000000000 x15: -> -0000ffffd41fe838 -> -[ 534.731046] x14: 0000000000000000 x13: 0000000000000000 x12: -> -0000000000000000 -> -[ 534.732402] x11: 0000000000000000 x10: 0000000000000000 x9 : -> -0000000000000000 -> -[ 534.734432] x8 : 0000000000000000 x7 : 0000000000000000 x6 : -> -ffff0000c0906e80 -> -[ 534.735921] x5 : 0000000000000000 x4 : 0000000000000000 x3 : -> -ffff80000aec3bf0 -> -[ 534.737437] x2 : 0000000000000000 x1 : 0000000000000000 x0 : -> -ffff0000c155a000 -> -[ 534.738878] Call trace: -> -[ 534.739368] 0x0 -> -[ 534.739713] dev_attr_store+0x1c/0x30 -> -[ 534.740186] sysfs_kf_write+0x48/0x58 -> -[ 534.740961] kernfs_fop_write_iter+0x128/0x184 -> -[ 534.741872] new_sync_write+0xdc/0x158 -> -[ 534.742706] vfs_write+0x1ac/0x2a8 -> -[ 534.743440] ksys_write+0x68/0xf0 -> -[ 534.744328] __arm64_sys_write+0x1c/0x28 -> -[ 534.745180] invoke_syscall+0x44/0xf0 -> -[ 534.745989] el0_svc_common+0x4c/0xfc -> -[ 534.746661] do_el0_svc+0x60/0xa8 -> -[ 534.747378] el0_svc+0x2c/0x78 -> -[ 534.748066] el0t_64_sync_handler+0xb8/0x12c -> -[ 534.748919] el0t_64_sync+0x18c/0x190 -> -[ 534.749629] Code: bad PC value -> -[ 534.750169] ---[ end trace 0000000000000000 ]--- -What was the top kernel commit when you ran this test? What is the line -number of "commit_store+0x118"? - -> -2. When I want to create x4 region with command: "cxl create-region -d -> -decoder0.0 -w 4 -g 4096 -m mem0 mem1 mem2 mem3". I got below errors: -> -> -cxl region: create_region: region0: failed to set target3 to mem3 -> -cxl region: cmd_create_region: created 0 regions -> -> -And kernel log as below: -> -[ 60.536663] cxl_region region0: config state: 0 -> -[ 60.536675] cxl_region region0: probe: -6 -> -[ 60.536696] cxl_acpi ACPI0017:00: decoder0.0: created region0 -> -[ 60.538251] cxl region0: mem0:endpoint3 decoder3.0 add: -> -mem0:decoder3.0 @ 0 next: none nr_eps: 1 nr_targets: 1 -> -[ 60.538278] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: -> -mem0:decoder3.0 @ 0 next: mem0 nr_eps: 1 nr_targets: 1 -> -[ 60.538295] cxl region0: ACPI0016:00:port1 decoder1.0 add: -> -mem0:decoder3.0 @ 0 next: 0000:0d:00.0 nr_eps: 1 nr_targets: 1 -> -[ 60.538647] cxl region0: mem1:endpoint4 decoder4.0 add: -> -mem1:decoder4.0 @ 1 next: none nr_eps: 1 nr_targets: 1 -> -[ 60.538663] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: -> -mem1:decoder4.0 @ 1 next: mem1 nr_eps: 2 nr_targets: 2 -> -[ 60.538675] cxl region0: ACPI0016:00:port1 decoder1.0 add: -> -mem1:decoder4.0 @ 1 next: 0000:0d:00.0 nr_eps: 2 nr_targets: 1 -> -[ 60.539311] cxl region0: mem2:endpoint5 decoder5.0 add: -> -mem2:decoder5.0 @ 2 next: none nr_eps: 1 nr_targets: 1 -> -[ 60.539332] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: -> -mem2:decoder5.0 @ 2 next: mem2 nr_eps: 3 nr_targets: 3 -> -[ 60.539343] cxl region0: ACPI0016:00:port1 decoder1.0 add: -> -mem2:decoder5.0 @ 2 next: 0000:0d:00.0 nr_eps: 3 nr_targets: 1 -> -[ 60.539711] cxl region0: mem3:endpoint6 decoder6.0 add: -> -mem3:decoder6.0 @ 3 next: none nr_eps: 1 nr_targets: 1 -> -[ 60.539723] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: -> -mem3:decoder6.0 @ 3 next: mem3 nr_eps: 4 nr_targets: 4 -> -[ 60.539735] cxl region0: ACPI0016:00:port1 decoder1.0 add: -> -mem3:decoder6.0 @ 3 next: 0000:0d:00.0 nr_eps: 4 nr_targets: 1 -> -[ 60.539742] cxl region0: ACPI0016:00:port1 iw: 1 ig: 256 -> -[ 60.539747] cxl region0: ACPI0016:00:port1 target[0] = 0000:0c:00.0 -> -for mem0:decoder3.0 @ 0 -> -[ 60.539754] cxl region0: 0000:0d:00.0:port2 iw: 4 ig: 512 -> -[ 60.539758] cxl region0: 0000:0d:00.0:port2 target[0] = -> -0000:0e:00.0 for mem0:decoder3.0 @ 0 -> -[ 60.539764] cxl region0: ACPI0016:00:port1: cannot host mem1:decoder4.0 at -> -1 -> -> -I have tried to write sysfs node manually, got same errors. -> -> -Hope I can get some helps here. -What is the output of: - - cxl list -MDTu -d decoder0.0 - -...? It might be the case that mem1 cannot be mapped by decoder0.0, or -at least not in the specified order, or that validation check is broken. - -Hi Dan, - -Thanks for your reply! - -On Mon, Aug 8, 2022 at 11:58 PM Dan Williams <dan.j.williams@intel.com> wrote: -> -> -What is the output of: -> -> -cxl list -MDTu -d decoder0.0 -> -> -...? It might be the case that mem1 cannot be mapped by decoder0.0, or -> -at least not in the specified order, or that validation check is broken. -Command "cxl list -MDTu -d decoder0.0" output: - -[ - { - "memdevs":[ - { - "memdev":"mem2", - "pmem_size":"256.00 MiB (268.44 MB)", - "ram_size":0, - "serial":"0", - "host":"0000:11:00.0" - }, - { - "memdev":"mem1", - "pmem_size":"256.00 MiB (268.44 MB)", - "ram_size":0, - "serial":"0", - "host":"0000:10:00.0" - }, - { - "memdev":"mem0", - "pmem_size":"256.00 MiB (268.44 MB)", - "ram_size":0, - "serial":"0", - "host":"0000:0f:00.0" - }, - { - "memdev":"mem3", - "pmem_size":"256.00 MiB (268.44 MB)", - "ram_size":0, - "serial":"0", - "host":"0000:12:00.0" - } - ] - }, - { - "root decoders":[ - { - "decoder":"decoder0.0", - "resource":"0x10000000000", - "size":"4.00 GiB (4.29 GB)", - "pmem_capable":true, - "volatile_capable":true, - "accelmem_capable":true, - "nr_targets":1, - "targets":[ - { - "target":"ACPI0016:01", - "alias":"pci0000:0c", - "position":0, - "id":"0xc" - } - ] - } - ] - } -] - -Bobo WL wrote: -> -Hi Dan, -> -> -Thanks for your reply! -> -> -On Mon, Aug 8, 2022 at 11:58 PM Dan Williams <dan.j.williams@intel.com> wrote: -> -> -> -> What is the output of: -> -> -> -> cxl list -MDTu -d decoder0.0 -> -> -> -> ...? It might be the case that mem1 cannot be mapped by decoder0.0, or -> -> at least not in the specified order, or that validation check is broken. -> -> -Command "cxl list -MDTu -d decoder0.0" output: -Thanks for this, I think I know the problem, but will try some -experiments with cxl_test first. - -Did the commit_store() crash stop reproducing with latest cxl/preview -branch? - -On Tue, Aug 9, 2022 at 11:17 PM Dan Williams <dan.j.williams@intel.com> wrote: -> -> -Bobo WL wrote: -> -> Hi Dan, -> -> -> -> Thanks for your reply! -> -> -> -> On Mon, Aug 8, 2022 at 11:58 PM Dan Williams <dan.j.williams@intel.com> -> -> wrote: -> -> > -> -> > What is the output of: -> -> > -> -> > cxl list -MDTu -d decoder0.0 -> -> > -> -> > ...? It might be the case that mem1 cannot be mapped by decoder0.0, or -> -> > at least not in the specified order, or that validation check is broken. -> -> -> -> Command "cxl list -MDTu -d decoder0.0" output: -> -> -Thanks for this, I think I know the problem, but will try some -> -experiments with cxl_test first. -> -> -Did the commit_store() crash stop reproducing with latest cxl/preview -> -branch? -No, still hitting this bug if don't add extra HB device in qemu - -Dan Williams wrote: -> -Bobo WL wrote: -> -> Hi Dan, -> -> -> -> Thanks for your reply! -> -> -> -> On Mon, Aug 8, 2022 at 11:58 PM Dan Williams <dan.j.williams@intel.com> -> -> wrote: -> -> > -> -> > What is the output of: -> -> > -> -> > cxl list -MDTu -d decoder0.0 -> -> > -> -> > ...? It might be the case that mem1 cannot be mapped by decoder0.0, or -> -> > at least not in the specified order, or that validation check is broken. -> -> -> -> Command "cxl list -MDTu -d decoder0.0" output: -> -> -Thanks for this, I think I know the problem, but will try some -> -experiments with cxl_test first. -Hmm, so my cxl_test experiment unfortunately passed so I'm not -reproducing the failure mode. This is the result of creating x4 region -with devices directly attached to a single host-bridge: - -# cxl create-region -d decoder3.5 -w 4 -m -g 256 mem{12,10,9,11} -s $((1<<30)) -{ - "region":"region8", - "resource":"0xf1f0000000", - "size":"1024.00 MiB (1073.74 MB)", - "interleave_ways":4, - "interleave_granularity":256, - "decode_state":"commit", - "mappings":[ - { - "position":3, - "memdev":"mem11", - "decoder":"decoder21.0" - }, - { - "position":2, - "memdev":"mem9", - "decoder":"decoder19.0" - }, - { - "position":1, - "memdev":"mem10", - "decoder":"decoder20.0" - }, - { - "position":0, - "memdev":"mem12", - "decoder":"decoder22.0" - } - ] -} -cxl region: cmd_create_region: created 1 region - -> -Did the commit_store() crash stop reproducing with latest cxl/preview -> -branch? -I missed the answer to this question. - -All of these changes are now in Linus' tree perhaps give that a try and -post the debug log again? - -On Thu, 11 Aug 2022 17:46:55 -0700 -Dan Williams <dan.j.williams@intel.com> wrote: - -> -Dan Williams wrote: -> -> Bobo WL wrote: -> -> > Hi Dan, -> -> > -> -> > Thanks for your reply! -> -> > -> -> > On Mon, Aug 8, 2022 at 11:58 PM Dan Williams <dan.j.williams@intel.com> -> -> > wrote: -> -> > > -> -> > > What is the output of: -> -> > > -> -> > > cxl list -MDTu -d decoder0.0 -> -> > > -> -> > > ...? It might be the case that mem1 cannot be mapped by decoder0.0, or -> -> > > at least not in the specified order, or that validation check is -> -> > > broken. -> -> > -> -> > Command "cxl list -MDTu -d decoder0.0" output: -> -> -> -> Thanks for this, I think I know the problem, but will try some -> -> experiments with cxl_test first. -> -> -Hmm, so my cxl_test experiment unfortunately passed so I'm not -> -reproducing the failure mode. This is the result of creating x4 region -> -with devices directly attached to a single host-bridge: -> -> -# cxl create-region -d decoder3.5 -w 4 -m -g 256 mem{12,10,9,11} -s $((1<<30)) -> -{ -> -"region":"region8", -> -"resource":"0xf1f0000000", -> -"size":"1024.00 MiB (1073.74 MB)", -> -"interleave_ways":4, -> -"interleave_granularity":256, -> -"decode_state":"commit", -> -"mappings":[ -> -{ -> -"position":3, -> -"memdev":"mem11", -> -"decoder":"decoder21.0" -> -}, -> -{ -> -"position":2, -> -"memdev":"mem9", -> -"decoder":"decoder19.0" -> -}, -> -{ -> -"position":1, -> -"memdev":"mem10", -> -"decoder":"decoder20.0" -> -}, -> -{ -> -"position":0, -> -"memdev":"mem12", -> -"decoder":"decoder22.0" -> -} -> -] -> -} -> -cxl region: cmd_create_region: created 1 region -> -> -> Did the commit_store() crash stop reproducing with latest cxl/preview -> -> branch? -> -> -I missed the answer to this question. -> -> -All of these changes are now in Linus' tree perhaps give that a try and -> -post the debug log again? -Hi Dan, - -I've moved onto looking at this one. -1 HB, 2RP (to make it configure the HDM decoder in the QEMU HB, I'll tidy that -up -at some stage), 1 switch, 4 downstream switch ports each with a type 3 - -I'm not getting a crash, but can't successfully setup a region. -Upon adding the final target -It's failing in check_last_peer() as pos < distance. -Seems distance is 4 which makes me think it's using the wrong level of the -heirarchy for -some reason or that distance check is wrong. -Wasn't a good idea to just skip that step though as it goes boom - though -stack trace is not useful. - -Jonathan - -On Wed, 17 Aug 2022 17:16:19 +0100 -Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: - -> -On Thu, 11 Aug 2022 17:46:55 -0700 -> -Dan Williams <dan.j.williams@intel.com> wrote: -> -> -> Dan Williams wrote: -> -> > Bobo WL wrote: -> -> > > Hi Dan, -> -> > > -> -> > > Thanks for your reply! -> -> > > -> -> > > On Mon, Aug 8, 2022 at 11:58 PM Dan Williams <dan.j.williams@intel.com> -> -> > > wrote: -> -> > > > -> -> > > > What is the output of: -> -> > > > -> -> > > > cxl list -MDTu -d decoder0.0 -> -> > > > -> -> > > > ...? It might be the case that mem1 cannot be mapped by decoder0.0, or -> -> > > > at least not in the specified order, or that validation check is -> -> > > > broken. -> -> > > -> -> > > Command "cxl list -MDTu -d decoder0.0" output: -> -> > -> -> > Thanks for this, I think I know the problem, but will try some -> -> > experiments with cxl_test first. -> -> -> -> Hmm, so my cxl_test experiment unfortunately passed so I'm not -> -> reproducing the failure mode. This is the result of creating x4 region -> -> with devices directly attached to a single host-bridge: -> -> -> -> # cxl create-region -d decoder3.5 -w 4 -m -g 256 mem{12,10,9,11} -s -> -> $((1<<30)) -> -> { -> -> "region":"region8", -> -> "resource":"0xf1f0000000", -> -> "size":"1024.00 MiB (1073.74 MB)", -> -> "interleave_ways":4, -> -> "interleave_granularity":256, -> -> "decode_state":"commit", -> -> "mappings":[ -> -> { -> -> "position":3, -> -> "memdev":"mem11", -> -> "decoder":"decoder21.0" -> -> }, -> -> { -> -> "position":2, -> -> "memdev":"mem9", -> -> "decoder":"decoder19.0" -> -> }, -> -> { -> -> "position":1, -> -> "memdev":"mem10", -> -> "decoder":"decoder20.0" -> -> }, -> -> { -> -> "position":0, -> -> "memdev":"mem12", -> -> "decoder":"decoder22.0" -> -> } -> -> ] -> -> } -> -> cxl region: cmd_create_region: created 1 region -> -> -> -> > Did the commit_store() crash stop reproducing with latest cxl/preview -> -> > branch? -> -> -> -> I missed the answer to this question. -> -> -> -> All of these changes are now in Linus' tree perhaps give that a try and -> -> post the debug log again? -> -> -Hi Dan, -> -> -I've moved onto looking at this one. -> -1 HB, 2RP (to make it configure the HDM decoder in the QEMU HB, I'll tidy -> -that up -> -at some stage), 1 switch, 4 downstream switch ports each with a type 3 -> -> -I'm not getting a crash, but can't successfully setup a region. -> -Upon adding the final target -> -It's failing in check_last_peer() as pos < distance. -> -Seems distance is 4 which makes me think it's using the wrong level of the -> -heirarchy for -> -some reason or that distance check is wrong. -> -Wasn't a good idea to just skip that step though as it goes boom - though -> -stack trace is not useful. -Turns out really weird corruption happens if you accidentally back two type3 -devices -with the same memory device. Who would have thought it :) - -That aside ignoring the check_last_peer() failure seems to make everything work -for this -topology. I'm not seeing the crash, so my guess is we fixed it somewhere along -the way. - -Now for the fun one. I've replicated the crash if we have - -1HB 1*RP 1SW, 4SW-DSP, 4Type3 - -Now, I'd expect to see it not 'work' because the QEMU HDM decoder won't be -programmed -but the null pointer dereference isn't related to that. - -The bug is straight forward. Not all decoders have commit callbacks... Will -send out -a possible fix shortly. - -Jonathan - - - -> -> -Jonathan -> -> -> -> -> -> - -On Thu, 18 Aug 2022 17:37:40 +0100 -Jonathan Cameron via <qemu-devel@nongnu.org> wrote: - -> -On Wed, 17 Aug 2022 17:16:19 +0100 -> -Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: -> -> -> On Thu, 11 Aug 2022 17:46:55 -0700 -> -> Dan Williams <dan.j.williams@intel.com> wrote: -> -> -> -> > Dan Williams wrote: -> -> > > Bobo WL wrote: -> -> > > > Hi Dan, -> -> > > > -> -> > > > Thanks for your reply! -> -> > > > -> -> > > > On Mon, Aug 8, 2022 at 11:58 PM Dan Williams -> -> > > > <dan.j.williams@intel.com> wrote: -> -> > > > > -> -> > > > > What is the output of: -> -> > > > > -> -> > > > > cxl list -MDTu -d decoder0.0 -> -> > > > > -> -> > > > > ...? It might be the case that mem1 cannot be mapped by decoder0.0, -> -> > > > > or -> -> > > > > at least not in the specified order, or that validation check is -> -> > > > > broken. -> -> > > > -> -> > > > Command "cxl list -MDTu -d decoder0.0" output: -> -> > > -> -> > > Thanks for this, I think I know the problem, but will try some -> -> > > experiments with cxl_test first. -> -> > -> -> > Hmm, so my cxl_test experiment unfortunately passed so I'm not -> -> > reproducing the failure mode. This is the result of creating x4 region -> -> > with devices directly attached to a single host-bridge: -> -> > -> -> > # cxl create-region -d decoder3.5 -w 4 -m -g 256 mem{12,10,9,11} -s -> -> > $((1<<30)) -> -> > { -> -> > "region":"region8", -> -> > "resource":"0xf1f0000000", -> -> > "size":"1024.00 MiB (1073.74 MB)", -> -> > "interleave_ways":4, -> -> > "interleave_granularity":256, -> -> > "decode_state":"commit", -> -> > "mappings":[ -> -> > { -> -> > "position":3, -> -> > "memdev":"mem11", -> -> > "decoder":"decoder21.0" -> -> > }, -> -> > { -> -> > "position":2, -> -> > "memdev":"mem9", -> -> > "decoder":"decoder19.0" -> -> > }, -> -> > { -> -> > "position":1, -> -> > "memdev":"mem10", -> -> > "decoder":"decoder20.0" -> -> > }, -> -> > { -> -> > "position":0, -> -> > "memdev":"mem12", -> -> > "decoder":"decoder22.0" -> -> > } -> -> > ] -> -> > } -> -> > cxl region: cmd_create_region: created 1 region -> -> > -> -> > > Did the commit_store() crash stop reproducing with latest cxl/preview -> -> > > branch? -> -> > -> -> > I missed the answer to this question. -> -> > -> -> > All of these changes are now in Linus' tree perhaps give that a try and -> -> > post the debug log again? -> -> -> -> Hi Dan, -> -> -> -> I've moved onto looking at this one. -> -> 1 HB, 2RP (to make it configure the HDM decoder in the QEMU HB, I'll tidy -> -> that up -> -> at some stage), 1 switch, 4 downstream switch ports each with a type 3 -> -> -> -> I'm not getting a crash, but can't successfully setup a region. -> -> Upon adding the final target -> -> It's failing in check_last_peer() as pos < distance. -> -> Seems distance is 4 which makes me think it's using the wrong level of the -> -> heirarchy for -> -> some reason or that distance check is wrong. -> -> Wasn't a good idea to just skip that step though as it goes boom - though -> -> stack trace is not useful. -> -> -Turns out really weird corruption happens if you accidentally back two type3 -> -devices -> -with the same memory device. Who would have thought it :) -> -> -That aside ignoring the check_last_peer() failure seems to make everything -> -work for this -> -topology. I'm not seeing the crash, so my guess is we fixed it somewhere -> -along the way. -> -> -Now for the fun one. I've replicated the crash if we have -> -> -1HB 1*RP 1SW, 4SW-DSP, 4Type3 -> -> -Now, I'd expect to see it not 'work' because the QEMU HDM decoder won't be -> -programmed -> -but the null pointer dereference isn't related to that. -> -> -The bug is straight forward. Not all decoders have commit callbacks... Will -> -send out -> -a possible fix shortly. -> -For completeness I'm carrying this hack because I haven't gotten my head -around the right fix for check_last_peer() failing on this test topology. - -diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c -index c49d9a5f1091..275e143bd748 100644 ---- a/drivers/cxl/core/region.c -+++ b/drivers/cxl/core/region.c -@@ -978,7 +978,7 @@ static int cxl_port_setup_targets(struct cxl_port *port, - rc = check_last_peer(cxled, ep, cxl_rr, - distance); - if (rc) -- return rc; -+ // return rc; - goto out_target_set; - } - goto add_target; --- - -I might find more bugs with more testing, but this is all the ones I've -seen so far + in Bobo's reports. Qemu fixes are now in upstream so -will be there in the release. - -As a reminder, testing on QEMU has a few corners... - -Need a patch to add serial number ECAP support. It is on list for revew, -but will have wait for after QEMU 7.1 release (which may be next week) - -QEMU still assumes HDM decoder on the host bridge will be programmed. -So if you want anything to work there should be at least -2 RP below the HB (no need to plug anything in to one of them). - -I don't want to add a commandline parameter to hide the decoder in QEMU -and detecting there is only one RP would require moving a bunch of static -stuff into runtime code (I think). - -I still think we should make the kernel check to see if there is a decoder, -but if not I might see how bad a hack it is to have QEMU ignore that decoder -if not committed in this one special case (HB HDM decoder with only one place -it can send stuff). Obviously that would be a break from specification -so less than idea! - -Thanks, - -Jonathan - -On Fri, 19 Aug 2022 09:46:55 +0100 -Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: - -> -On Thu, 18 Aug 2022 17:37:40 +0100 -> -Jonathan Cameron via <qemu-devel@nongnu.org> wrote: -> -> -> On Wed, 17 Aug 2022 17:16:19 +0100 -> -> Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: -> -> -> -> > On Thu, 11 Aug 2022 17:46:55 -0700 -> -> > Dan Williams <dan.j.williams@intel.com> wrote: -> -> > -> -> > > Dan Williams wrote: -> -> > > > Bobo WL wrote: -> -> > > > > Hi Dan, -> -> > > > > -> -> > > > > Thanks for your reply! -> -> > > > > -> -> > > > > On Mon, Aug 8, 2022 at 11:58 PM Dan Williams -> -> > > > > <dan.j.williams@intel.com> wrote: -> -> > > > > > -> -> > > > > > What is the output of: -> -> > > > > > -> -> > > > > > cxl list -MDTu -d decoder0.0 -> -> > > > > > -> -> > > > > > ...? It might be the case that mem1 cannot be mapped by -> -> > > > > > decoder0.0, or -> -> > > > > > at least not in the specified order, or that validation check is -> -> > > > > > broken. -> -> > > > > -> -> > > > > Command "cxl list -MDTu -d decoder0.0" output: -> -> > > > -> -> > > > Thanks for this, I think I know the problem, but will try some -> -> > > > experiments with cxl_test first. -> -> > > -> -> > > Hmm, so my cxl_test experiment unfortunately passed so I'm not -> -> > > reproducing the failure mode. This is the result of creating x4 region -> -> > > with devices directly attached to a single host-bridge: -> -> > > -> -> > > # cxl create-region -d decoder3.5 -w 4 -m -g 256 mem{12,10,9,11} -s -> -> > > $((1<<30)) -> -> > > { -> -> > > "region":"region8", -> -> > > "resource":"0xf1f0000000", -> -> > > "size":"1024.00 MiB (1073.74 MB)", -> -> > > "interleave_ways":4, -> -> > > "interleave_granularity":256, -> -> > > "decode_state":"commit", -> -> > > "mappings":[ -> -> > > { -> -> > > "position":3, -> -> > > "memdev":"mem11", -> -> > > "decoder":"decoder21.0" -> -> > > }, -> -> > > { -> -> > > "position":2, -> -> > > "memdev":"mem9", -> -> > > "decoder":"decoder19.0" -> -> > > }, -> -> > > { -> -> > > "position":1, -> -> > > "memdev":"mem10", -> -> > > "decoder":"decoder20.0" -> -> > > }, -> -> > > { -> -> > > "position":0, -> -> > > "memdev":"mem12", -> -> > > "decoder":"decoder22.0" -> -> > > } -> -> > > ] -> -> > > } -> -> > > cxl region: cmd_create_region: created 1 region -> -> > > -> -> > > > Did the commit_store() crash stop reproducing with latest cxl/preview -> -> > > > branch? -> -> > > -> -> > > I missed the answer to this question. -> -> > > -> -> > > All of these changes are now in Linus' tree perhaps give that a try and -> -> > > post the debug log again? -> -> > -> -> > Hi Dan, -> -> > -> -> > I've moved onto looking at this one. -> -> > 1 HB, 2RP (to make it configure the HDM decoder in the QEMU HB, I'll tidy -> -> > that up -> -> > at some stage), 1 switch, 4 downstream switch ports each with a type 3 -> -> > -> -> > I'm not getting a crash, but can't successfully setup a region. -> -> > Upon adding the final target -> -> > It's failing in check_last_peer() as pos < distance. -> -> > Seems distance is 4 which makes me think it's using the wrong level of -> -> > the heirarchy for -> -> > some reason or that distance check is wrong. -> -> > Wasn't a good idea to just skip that step though as it goes boom - though -> -> > stack trace is not useful. -> -> -> -> Turns out really weird corruption happens if you accidentally back two -> -> type3 devices -> -> with the same memory device. Who would have thought it :) -> -> -> -> That aside ignoring the check_last_peer() failure seems to make everything -> -> work for this -> -> topology. I'm not seeing the crash, so my guess is we fixed it somewhere -> -> along the way. -> -> -> -> Now for the fun one. I've replicated the crash if we have -> -> -> -> 1HB 1*RP 1SW, 4SW-DSP, 4Type3 -> -> -> -> Now, I'd expect to see it not 'work' because the QEMU HDM decoder won't be -> -> programmed -> -> but the null pointer dereference isn't related to that. -> -> -> -> The bug is straight forward. Not all decoders have commit callbacks... -> -> Will send out -> -> a possible fix shortly. -> -> -> -For completeness I'm carrying this hack because I haven't gotten my head -> -around the right fix for check_last_peer() failing on this test topology. -> -> -diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c -> -index c49d9a5f1091..275e143bd748 100644 -> ---- a/drivers/cxl/core/region.c -> -+++ b/drivers/cxl/core/region.c -> -@@ -978,7 +978,7 @@ static int cxl_port_setup_targets(struct cxl_port *port, -> -rc = check_last_peer(cxled, ep, cxl_rr, -> -distance); -> -if (rc) -> -- return rc; -> -+ // return rc; -> -goto out_target_set; -> -} -> -goto add_target; -I'm still carrying this hack and still haven't worked out the right fix. - -Suggestions welcome! If not I'll hopefully get some time on this -towards the end of the week. - -Jonathan - diff --git a/classification_output/03/KVM/42613410 b/classification_output/03/KVM/42613410 deleted file mode 100644 index 662d0dfa1..000000000 --- a/classification_output/03/KVM/42613410 +++ /dev/null @@ -1,152 +0,0 @@ -KVM: 0.381 -other: 0.332 -semantic: 0.327 -mistranslation: 0.314 -instruction: 0.307 -network: 0.284 -boot: 0.187 - -[Qemu-devel] [PATCH, Bug 1612908] scripts: Add TCP endpoints for qom-* scripts - -From: Carl Allendorph <address@hidden> - -I've created a patch for bug #1612908. The current docs for the scripts -in the "scripts/qmp/" directory suggest that both unix sockets and -tcp endpoints can be used. The TCP endpoints don't work for most of the -scripts, with notable exception of 'qmp-shell'. This patch attempts to -refactor the process of distinguishing between unix path endpoints and -tcp endpoints to work for all of these scripts. - -Carl Allendorph (1): - scripts: Add ability for qom-* python scripts to target tcp endpoints - - scripts/qmp/qmp-shell | 22 ++-------------------- - scripts/qmp/qmp.py | 23 ++++++++++++++++++++--- - 2 files changed, 22 insertions(+), 23 deletions(-) - --- -2.7.4 - -From: Carl Allendorph <address@hidden> - -The current code for QEMUMonitorProtocol accepts both a unix socket -endpoint as a string and a tcp endpoint as a tuple. Most of the scripts -that use this class don't massage the command line argument to generate -a tuple. This patch refactors qmp-shell slightly to reuse the existing -parsing of the "host:port" string for all the qom-* scripts. - -Signed-off-by: Carl Allendorph <address@hidden> ---- - scripts/qmp/qmp-shell | 22 ++-------------------- - scripts/qmp/qmp.py | 23 ++++++++++++++++++++--- - 2 files changed, 22 insertions(+), 23 deletions(-) - -diff --git a/scripts/qmp/qmp-shell b/scripts/qmp/qmp-shell -index 0373b24..8a2a437 100755 ---- a/scripts/qmp/qmp-shell -+++ b/scripts/qmp/qmp-shell -@@ -83,9 +83,6 @@ class QMPCompleter(list): - class QMPShellError(Exception): - pass - --class QMPShellBadPort(QMPShellError): -- pass -- - class FuzzyJSON(ast.NodeTransformer): - '''This extension of ast.NodeTransformer filters literal "true/false/null" - values in an AST and replaces them by proper "True/False/None" values that -@@ -103,28 +100,13 @@ class FuzzyJSON(ast.NodeTransformer): - # _execute_cmd()). Let's design a better one. - class QMPShell(qmp.QEMUMonitorProtocol): - def __init__(self, address, pretty=False): -- qmp.QEMUMonitorProtocol.__init__(self, self.__get_address(address)) -+ qmp.QEMUMonitorProtocol.__init__(self, address) - self._greeting = None - self._completer = None - self._pretty = pretty - self._transmode = False - self._actions = list() - -- def __get_address(self, arg): -- """ -- Figure out if the argument is in the port:host form, if it's not it's -- probably a file path. -- """ -- addr = arg.split(':') -- if len(addr) == 2: -- try: -- port = int(addr[1]) -- except ValueError: -- raise QMPShellBadPort -- return ( addr[0], port ) -- # socket path -- return arg -- - def _fill_completion(self): - for cmd in self.cmd('query-commands')['return']: - self._completer.append(cmd['name']) -@@ -400,7 +382,7 @@ def main(): - - if qemu is None: - fail_cmdline() -- except QMPShellBadPort: -+ except qmp.QMPShellBadPort: - die('bad port number in command-line') - - try: -diff --git a/scripts/qmp/qmp.py b/scripts/qmp/qmp.py -index 62d3651..261ece8 100644 ---- a/scripts/qmp/qmp.py -+++ b/scripts/qmp/qmp.py -@@ -25,21 +25,23 @@ class QMPCapabilitiesError(QMPError): - class QMPTimeoutError(QMPError): - pass - -+class QMPShellBadPort(QMPError): -+ pass -+ - class QEMUMonitorProtocol: - def __init__(self, address, server=False, debug=False): - """ - Create a QEMUMonitorProtocol class. - - @param address: QEMU address, can be either a unix socket path (string) -- or a tuple in the form ( address, port ) for a TCP -- connection -+ or a TCP endpoint (string in the format "host:port") - @param server: server mode listens on the socket (bool) - @raise socket.error on socket connection errors - @note No connection is established, this is done by the connect() or - accept() methods - """ - self.__events = [] -- self.__address = address -+ self.__address = self.__get_address(address) - self._debug = debug - self.__sock = self.__get_sock() - if server: -@@ -47,6 +49,21 @@ class QEMUMonitorProtocol: - self.__sock.bind(self.__address) - self.__sock.listen(1) - -+ def __get_address(self, arg): -+ """ -+ Figure out if the argument is in the port:host form, if it's not it's -+ probably a file path. -+ """ -+ addr = arg.split(':') -+ if len(addr) == 2: -+ try: -+ port = int(addr[1]) -+ except ValueError: -+ raise QMPShellBadPort -+ return ( addr[0], port ) -+ # socket path -+ return arg -+ - def __get_sock(self): - if isinstance(self.__address, tuple): - family = socket.AF_INET --- -2.7.4 - diff --git a/classification_output/03/KVM/43643137 b/classification_output/03/KVM/43643137 deleted file mode 100644 index 27cf0401f..000000000 --- a/classification_output/03/KVM/43643137 +++ /dev/null @@ -1,541 +0,0 @@ -KVM: 0.794 -other: 0.781 -semantic: 0.764 -instruction: 0.754 -network: 0.709 -mistranslation: 0.665 -boot: 0.652 - -[Qemu-devel] [BUG/RFC] INIT IPI lost when VM starts - -Hi, -We encountered a problem that when a domain starts, seabios failed to online a -vCPU. - -After investigation, we found that the reason is in kvm-kmod, KVM_APIC_INIT bit -in -vcpu->arch.apic->pending_events was overwritten by qemu, and thus an INIT IPI -sent -to AP was lost. Qemu does this since libvirtd sends a âquery-cpusâ qmp command -to qemu -on VM start. - -In qemu, qmp_query_cpus-> cpu_synchronize_state-> kvm_cpu_synchronize_state-> -do_kvm_cpu_synchronize_state, qemu gets registers/vcpu_events from kvm-kmod and -sets cpu->kvm_vcpu_dirty to true, and vcpu thread in qemu will call -kvm_arch_put_registers if cpu->kvm_vcpu_dirty is true, thus pending_events is -overwritten by qemu. - -I think there is no need for qemu to set cpu->kvm_vcpu_dirty to true after -âquery-cpusâ, -and kvm-kmod should not clear KVM_APIC_INIT unconditionally. And I am not sure -whether -it is OK for qemu to set cpu->kvm_vcpu_dirty in do_kvm_cpu_synchronize_state in -each caller. - -Whatâs your opinion? - -Let me clarify it more clearly. Time sequence is that qemu handles âquery-cpusâ qmp -command, vcpu 1 (and vcpu 0) got registers from kvm-kmod (qmp_query_cpus-> -cpu_synchronize_state-> kvm_cpu_synchronize_state-> -> do_kvm_cpu_synchronize_state-> kvm_arch_get_registers), then vcpu 0 (BSP) -sends INIT-SIPI to vcpu 1(AP). In kvm-kmod, vcpu 1âs pending_eventsâs KVM_APIC_INIT -bit set. -Then vcpu 1 continue running, vcpu1 thread in qemu calls -kvm_arch_put_registers-> kvm_put_vcpu_events, so KVM_APIC_INIT bit in vcpu 1âs -pending_events got cleared, i.e., lost. - -In kvm-kmod, except for pending_events, sipi_vector may also be overwritten., -so I am not sure if there are other fields/registers in danger, i.e., those may -be modified asynchronously with vcpu thread itself. - -BTW, using a sleep like following can reliably reproduce this problem, if VM -equipped with more than 2 vcpus and starting VM using libvirtd. - -diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index 55865db..5099290 100644 ---- a/target/i386/kvm.c -+++ b/target/i386/kvm.c -@@ -2534,6 +2534,11 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level) - KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SIPI_VECTOR; - } - -+ if (CPU(cpu)->cpu_index == 1) { -+ fprintf(stderr, "vcpu 1 sleep!!!!\n"); -+ sleep(10); -+ } -+ - return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events); - } - - -On 2017/3/20 22:21, Herongguang (Stephen) wrote: -Hi, -We encountered a problem that when a domain starts, seabios failed to online a -vCPU. - -After investigation, we found that the reason is in kvm-kmod, KVM_APIC_INIT bit -in -vcpu->arch.apic->pending_events was overwritten by qemu, and thus an INIT IPI -sent -to AP was lost. Qemu does this since libvirtd sends a âquery-cpusâ qmp command -to qemu -on VM start. - -In qemu, qmp_query_cpus-> cpu_synchronize_state-> kvm_cpu_synchronize_state-> -do_kvm_cpu_synchronize_state, qemu gets registers/vcpu_events from kvm-kmod and -sets cpu->kvm_vcpu_dirty to true, and vcpu thread in qemu will call -kvm_arch_put_registers if cpu->kvm_vcpu_dirty is true, thus pending_events is -overwritten by qemu. - -I think there is no need for qemu to set cpu->kvm_vcpu_dirty to true after -âquery-cpusâ, -and kvm-kmod should not clear KVM_APIC_INIT unconditionally. And I am not sure -whether -it is OK for qemu to set cpu->kvm_vcpu_dirty in do_kvm_cpu_synchronize_state in -each caller. - -Whatâs your opinion? - -On 20/03/2017 15:21, Herongguang (Stephen) wrote: -> -> -We encountered a problem that when a domain starts, seabios failed to -> -online a vCPU. -> -> -After investigation, we found that the reason is in kvm-kmod, -> -KVM_APIC_INIT bit in -> -vcpu->arch.apic->pending_events was overwritten by qemu, and thus an -> -INIT IPI sent -> -to AP was lost. Qemu does this since libvirtd sends a âquery-cpusâ qmp -> -command to qemu -> -on VM start. -> -> -In qemu, qmp_query_cpus-> cpu_synchronize_state-> -> -kvm_cpu_synchronize_state-> -> -do_kvm_cpu_synchronize_state, qemu gets registers/vcpu_events from -> -kvm-kmod and -> -sets cpu->kvm_vcpu_dirty to true, and vcpu thread in qemu will call -> -kvm_arch_put_registers if cpu->kvm_vcpu_dirty is true, thus -> -pending_events is -> -overwritten by qemu. -> -> -I think there is no need for qemu to set cpu->kvm_vcpu_dirty to true -> -after âquery-cpusâ, -> -and kvm-kmod should not clear KVM_APIC_INIT unconditionally. And I am -> -not sure whether -> -it is OK for qemu to set cpu->kvm_vcpu_dirty in -> -do_kvm_cpu_synchronize_state in each caller. -> -> -Whatâs your opinion? -Hi Rongguang, - -sorry for the late response. - -Where exactly is KVM_APIC_INIT dropped? kvm_get_mp_state does clear the -bit, but the result of the INIT is stored in mp_state. - -kvm_get_vcpu_events is called after kvm_get_mp_state; it retrieves -KVM_APIC_INIT in events.smi.latched_init and kvm_set_vcpu_events passes -it back. Maybe it should ignore events.smi.latched_init if not in SMM, -but I would like to understand the exact sequence of events. - -Thanks, - -paolo - -On 2017/4/6 0:16, Paolo Bonzini wrote: -On 20/03/2017 15:21, Herongguang (Stephen) wrote: -We encountered a problem that when a domain starts, seabios failed to -online a vCPU. - -After investigation, we found that the reason is in kvm-kmod, -KVM_APIC_INIT bit in -vcpu->arch.apic->pending_events was overwritten by qemu, and thus an -INIT IPI sent -to AP was lost. Qemu does this since libvirtd sends a âquery-cpusâ qmp -command to qemu -on VM start. - -In qemu, qmp_query_cpus-> cpu_synchronize_state-> -kvm_cpu_synchronize_state-> -do_kvm_cpu_synchronize_state, qemu gets registers/vcpu_events from -kvm-kmod and -sets cpu->kvm_vcpu_dirty to true, and vcpu thread in qemu will call -kvm_arch_put_registers if cpu->kvm_vcpu_dirty is true, thus -pending_events is -overwritten by qemu. - -I think there is no need for qemu to set cpu->kvm_vcpu_dirty to true -after âquery-cpusâ, -and kvm-kmod should not clear KVM_APIC_INIT unconditionally. And I am -not sure whether -it is OK for qemu to set cpu->kvm_vcpu_dirty in -do_kvm_cpu_synchronize_state in each caller. - -Whatâs your opinion? -Hi Rongguang, - -sorry for the late response. - -Where exactly is KVM_APIC_INIT dropped? kvm_get_mp_state does clear the -bit, but the result of the INIT is stored in mp_state. -It's dropped in KVM_SET_VCPU_EVENTS, see below. -kvm_get_vcpu_events is called after kvm_get_mp_state; it retrieves -KVM_APIC_INIT in events.smi.latched_init and kvm_set_vcpu_events passes -it back. Maybe it should ignore events.smi.latched_init if not in SMM, -but I would like to understand the exact sequence of events. -time0: -vcpu1: -qmp_query_cpus-> cpu_synchronize_state-> kvm_cpu_synchronize_state-> -> do_kvm_cpu_synchronize_state(and set vcpu1's cpu->kvm_vcpu_dirty to true)-> -kvm_arch_get_registers(KVM_APIC_INIT bit in vcpu->arch.apic->pending_events was not set) - -time1: -vcpu0: -send INIT-SIPI to all AP->(in vcpu 0's context)__apic_accept_irq(KVM_APIC_INIT bit -in vcpu1's arch.apic->pending_events is set) - -time2: -vcpu1: -kvm_cpu_exec->(if cpu->kvm_vcpu_dirty is -true)kvm_arch_put_registers->kvm_put_vcpu_events(overwritten KVM_APIC_INIT bit in -vcpu->arch.apic->pending_events!) - -So it's a race between vcpu1 get/put registers with kvm/other vcpus changing -vcpu1's status/structure fields in the mean time, I am in worry of if there are -other fields may be overwritten, -sipi_vector is one. - -also see: -https://www.mail-archive.com/address@hidden/msg438675.html -Thanks, - -paolo - -. - -Hi Paolo, - -What's your opinion about this patch? We found it just before finishing patches -for the past two days. - - -Thanks, --Gonglei - - -> ------Original Message----- -> -From: address@hidden [ -mailto:address@hidden -On -> -Behalf Of Herongguang (Stephen) -> -Sent: Thursday, April 06, 2017 9:47 AM -> -To: Paolo Bonzini; address@hidden; address@hidden; -> -address@hidden; address@hidden; address@hidden; -> -wangxin (U); Huangweidong (C) -> -Subject: Re: [BUG/RFC] INIT IPI lost when VM starts -> -> -> -> -On 2017/4/6 0:16, Paolo Bonzini wrote: -> -> -> -> On 20/03/2017 15:21, Herongguang (Stephen) wrote: -> ->> We encountered a problem that when a domain starts, seabios failed to -> ->> online a vCPU. -> ->> -> ->> After investigation, we found that the reason is in kvm-kmod, -> ->> KVM_APIC_INIT bit in -> ->> vcpu->arch.apic->pending_events was overwritten by qemu, and thus an -> ->> INIT IPI sent -> ->> to AP was lost. Qemu does this since libvirtd sends a âquery-cpusâ qmp -> ->> command to qemu -> ->> on VM start. -> ->> -> ->> In qemu, qmp_query_cpus-> cpu_synchronize_state-> -> ->> kvm_cpu_synchronize_state-> -> ->> do_kvm_cpu_synchronize_state, qemu gets registers/vcpu_events from -> ->> kvm-kmod and -> ->> sets cpu->kvm_vcpu_dirty to true, and vcpu thread in qemu will call -> ->> kvm_arch_put_registers if cpu->kvm_vcpu_dirty is true, thus -> ->> pending_events is -> ->> overwritten by qemu. -> ->> -> ->> I think there is no need for qemu to set cpu->kvm_vcpu_dirty to true -> ->> after âquery-cpusâ, -> ->> and kvm-kmod should not clear KVM_APIC_INIT unconditionally. And I am -> ->> not sure whether -> ->> it is OK for qemu to set cpu->kvm_vcpu_dirty in -> ->> do_kvm_cpu_synchronize_state in each caller. -> ->> -> ->> Whatâs your opinion? -> -> Hi Rongguang, -> -> -> -> sorry for the late response. -> -> -> -> Where exactly is KVM_APIC_INIT dropped? kvm_get_mp_state does clear -> -the -> -> bit, but the result of the INIT is stored in mp_state. -> -> -It's dropped in KVM_SET_VCPU_EVENTS, see below. -> -> -> -> -> kvm_get_vcpu_events is called after kvm_get_mp_state; it retrieves -> -> KVM_APIC_INIT in events.smi.latched_init and kvm_set_vcpu_events passes -> -> it back. Maybe it should ignore events.smi.latched_init if not in SMM, -> -> but I would like to understand the exact sequence of events. -> -> -time0: -> -vcpu1: -> -qmp_query_cpus-> cpu_synchronize_state-> kvm_cpu_synchronize_state-> -> -> do_kvm_cpu_synchronize_state(and set vcpu1's cpu->kvm_vcpu_dirty to -> -true)-> kvm_arch_get_registers(KVM_APIC_INIT bit in -> -vcpu->arch.apic->pending_events was not set) -> -> -time1: -> -vcpu0: -> -send INIT-SIPI to all AP->(in vcpu 0's -> -context)__apic_accept_irq(KVM_APIC_INIT bit in vcpu1's -> -arch.apic->pending_events is set) -> -> -time2: -> -vcpu1: -> -kvm_cpu_exec->(if cpu->kvm_vcpu_dirty is -> -true)kvm_arch_put_registers->kvm_put_vcpu_events(overwritten -> -KVM_APIC_INIT bit in vcpu->arch.apic->pending_events!) -> -> -So it's a race between vcpu1 get/put registers with kvm/other vcpus changing -> -vcpu1's status/structure fields in the mean time, I am in worry of if there -> -are -> -other fields may be overwritten, -> -sipi_vector is one. -> -> -also see: -> -https://www.mail-archive.com/address@hidden/msg438675.html -> -> -> Thanks, -> -> -> -> paolo -> -> -> -> . -> -> -> - -2017-11-20 06:57+0000, Gonglei (Arei): -> -Hi Paolo, -> -> -What's your opinion about this patch? We found it just before finishing -> -patches -> -for the past two days. -I think your case was fixed by f4ef19108608 ("KVM: X86: Fix loss of -pending INIT due to race"), but that patch didn't fix it perfectly, so -maybe you're hitting a similar case that happens in SMM ... - -> -> -----Original Message----- -> -> From: address@hidden [ -mailto:address@hidden -On -> -> Behalf Of Herongguang (Stephen) -> -> On 2017/4/6 0:16, Paolo Bonzini wrote: -> -> > Hi Rongguang, -> -> > -> -> > sorry for the late response. -> -> > -> -> > Where exactly is KVM_APIC_INIT dropped? kvm_get_mp_state does clear -> -> the -> -> > bit, but the result of the INIT is stored in mp_state. -> -> -> -> It's dropped in KVM_SET_VCPU_EVENTS, see below. -> -> -> -> > -> -> > kvm_get_vcpu_events is called after kvm_get_mp_state; it retrieves -> -> > KVM_APIC_INIT in events.smi.latched_init and kvm_set_vcpu_events passes -> -> > it back. Maybe it should ignore events.smi.latched_init if not in SMM, -> -> > but I would like to understand the exact sequence of events. -> -> -> -> time0: -> -> vcpu1: -> -> qmp_query_cpus-> cpu_synchronize_state-> kvm_cpu_synchronize_state-> -> -> > do_kvm_cpu_synchronize_state(and set vcpu1's cpu->kvm_vcpu_dirty to -> -> true)-> kvm_arch_get_registers(KVM_APIC_INIT bit in -> -> vcpu->arch.apic->pending_events was not set) -> -> -> -> time1: -> -> vcpu0: -> -> send INIT-SIPI to all AP->(in vcpu 0's -> -> context)__apic_accept_irq(KVM_APIC_INIT bit in vcpu1's -> -> arch.apic->pending_events is set) -> -> -> -> time2: -> -> vcpu1: -> -> kvm_cpu_exec->(if cpu->kvm_vcpu_dirty is -> -> true)kvm_arch_put_registers->kvm_put_vcpu_events(overwritten -> -> KVM_APIC_INIT bit in vcpu->arch.apic->pending_events!) -> -> -> -> So it's a race between vcpu1 get/put registers with kvm/other vcpus changing -> -> vcpu1's status/structure fields in the mean time, I am in worry of if there -> -> are -> -> other fields may be overwritten, -> -> sipi_vector is one. -Fields that can be asynchronously written by other VCPUs (like SIPI, -NMI) must not be SET if other VCPUs were not paused since the last GET. -(Looking at the interface, we can currently lose pending SMI.) - -INIT is one of the restricted fields, but the API unconditionally -couples SMM with latched INIT, which means that we can lose an INIT if -the VCPU is in SMM mode -- do you see SMM in kvm_vcpu_events? - -Thanks. - diff --git a/classification_output/03/KVM/55961334 b/classification_output/03/KVM/55961334 deleted file mode 100644 index a097dd25a..000000000 --- a/classification_output/03/KVM/55961334 +++ /dev/null @@ -1,42 +0,0 @@ -KVM: 0.881 -instruction: 0.803 -semantic: 0.775 -mistranslation: 0.718 -other: 0.715 -network: 0.697 -boot: 0.569 - -[Bug] "-ht" flag ignored under KVM - guest still reports HT - -Hi Community, -We have observed that the 'ht' feature bit cannot be disabled when QEMU runs -with KVM acceleration. -qemu-system-x86_64 \ - --enable-kvm \ - -machine q35 \ - -cpu host,-ht \ - -smp 4 \ - -m 4G \ - -drive file=rootfs.img,format=raw \ - -nographic \ - -append 'console=ttyS0 root=/dev/sda rw' -Because '-ht' is specified, the guest should expose no HT capability -(cpuid.1.edx[28] = 0), and /proc/cpuinfo shouldn't show HT feature, but we still -saw ht in linux guest when run 'cat /proc/cpuinfo'. -XiaoYao mentioned that: - -It has been the behavior of QEMU since - - commit 400281af34e5ee6aa9f5496b53d8f82c6fef9319 - Author: Andre Przywara <andre.przywara@amd.com> - Date: Wed Aug 19 15:42:42 2009 +0200 - - set CPUID bits to present cores and threads topology - -that we cannot remove HT CPUID bit from guest via "-cpu xxx,-ht" if the -VM has >= 2 vcpus. -I'd like to know whether there's a plan to address this issue, or if the current -behaviour is considered acceptable. -Best regards, -Ewan. - diff --git a/classification_output/03/KVM/71456293 b/classification_output/03/KVM/71456293 deleted file mode 100644 index e8ded0bbc..000000000 --- a/classification_output/03/KVM/71456293 +++ /dev/null @@ -1,1489 +0,0 @@ -KVM: 0.691 -mistranslation: 0.659 -instruction: 0.624 -semantic: 0.600 -other: 0.598 -boot: 0.598 -network: 0.491 - -[Qemu-devel][bug] qemu crash when migrate vm and vm's disks - -When migrate vm and vmâs disks target host qemu crash due to an invalid free. -#0 object_unref (obj=0x1000) at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/qom/object.c:920 -#1 0x0000560434d79e79 in memory_region_unref (mr=<optimized out>) -at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:1730 -#2 flatview_destroy (view=0x560439653880) at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:292 -#3 0x000056043514dfbe in call_rcu_thread (opaque=<optimized out>) -at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/util/rcu.c:284 -#4 0x00007fbc2b36fe25 in start_thread () from /lib64/libpthread.so.0 -#5 0x00007fbc2b099bad in clone () from /lib64/libc.so.6 -test base qemu-2.12.0 -ï¼ -but use lastest qemu(v6.0.0-rc2) also reproduce. -As follow patch can resolve this problem: -https://lists.gnu.org/archive/html/qemu-devel/2018-07/msg02272.html -Steps to reproduce: -(1) Create VM (virsh define) -(2) Add 64 virtio scsi disks -(3) migrate vm and vmâdisks -------------------------------------------------------------------------------------------------------------------------------------- -æ¬é®ä»¶åå ¶é件嫿æ°åä¸éå¢çä¿å¯ä¿¡æ¯ï¼ä» éäºåéç»ä¸é¢å°åä¸ååº -ç个人æç¾¤ç»ãç¦æ¢ä»»ä½å ¶ä»äººä»¥ä»»ä½å½¢å¼ä½¿ç¨ï¼å æ¬ä½ä¸éäºå ¨é¨æé¨åå°æ³é²ãå¤å¶ã -ææ£åï¼æ¬é®ä»¶ä¸çä¿¡æ¯ã妿æ¨éæ¶äºæ¬é®ä»¶ï¼è¯·æ¨ç«å³çµè¯æé®ä»¶éç¥å件人并å 餿¬ -é®ä»¶ï¼ -This e-mail and its attachments contain confidential information from New H3C, which is -intended only for the person or entity whose address is listed above. Any use of the -information contained herein in any way (including, but not limited to, total or partial -disclosure, reproduction, or dissemination) by persons other than the intended -recipient(s) is prohibited. If you receive this e-mail in error, please notify the sender -by phone or email immediately and delete it! - -* Yuchen (yu.chen@h3c.com) wrote: -> -When migrate vm and vmâs disks target host qemu crash due to an invalid free. -> -> -#0 object_unref (obj=0x1000) at -> -/qemu-2.12/rpmbuild/BUILD/qemu-2.12/qom/object.c:920 -> -#1 0x0000560434d79e79 in memory_region_unref (mr=<optimized out>) -> -at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:1730 -> -#2 flatview_destroy (view=0x560439653880) at -> -/qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:292 -> -#3 0x000056043514dfbe in call_rcu_thread (opaque=<optimized out>) -> -at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/util/rcu.c:284 -> -#4 0x00007fbc2b36fe25 in start_thread () from /lib64/libpthread.so.0 -> -#5 0x00007fbc2b099bad in clone () from /lib64/libc.so.6 -> -> -test base qemu-2.12.0ï¼but use lastest qemu(v6.0.0-rc2) also reproduce. -Interesting. - -> -As follow patch can resolve this problem: -> -https://lists.gnu.org/archive/html/qemu-devel/2018-07/msg02272.html -That's a pci/rcu change; ccing Paolo and Micahel. - -> -Steps to reproduce: -> -(1) Create VM (virsh define) -> -(2) Add 64 virtio scsi disks -Is that hot adding the disks later, or are they included in the VM at -creation? -Can you provide a libvirt XML example? - -> -(3) migrate vm and vmâdisks -What do you mean by 'and vm disks' - are you doing a block migration? - -Dave - -> -------------------------------------------------------------------------------------------------------------------------------------- -> -æ¬é®ä»¶åå ¶é件嫿æ°åä¸éå¢çä¿å¯ä¿¡æ¯ï¼ä» éäºåéç»ä¸é¢å°åä¸ååº -> -ç个人æç¾¤ç»ãç¦æ¢ä»»ä½å ¶ä»äººä»¥ä»»ä½å½¢å¼ä½¿ç¨ï¼å æ¬ä½ä¸éäºå ¨é¨æé¨åå°æ³é²ãå¤å¶ã -> -ææ£åï¼æ¬é®ä»¶ä¸çä¿¡æ¯ã妿æ¨éæ¶äºæ¬é®ä»¶ï¼è¯·æ¨ç«å³çµè¯æé®ä»¶éç¥å件人并å 餿¬ -> -é®ä»¶ï¼ -> -This e-mail and its attachments contain confidential information from New -> -H3C, which is -> -intended only for the person or entity whose address is listed above. Any use -> -of the -> -information contained herein in any way (including, but not limited to, total -> -or partial -> -disclosure, reproduction, or dissemination) by persons other than the intended -> -recipient(s) is prohibited. If you receive this e-mail in error, please -> -notify the sender -> -by phone or email immediately and delete it! --- -Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK - -> ------é®ä»¶åä»¶----- -> -å件人: Dr. David Alan Gilbert [ -mailto:dgilbert@redhat.com -] -> -åéæ¶é´: 2021å¹´4æ8æ¥ 19:27 -> -æ¶ä»¶äºº: yuchen (Cloud) <yu.chen@h3c.com>; pbonzini@redhat.com; -> -mst@redhat.com -> -æé: qemu-devel@nongnu.org -> -主é¢: Re: [Qemu-devel][bug] qemu crash when migrate vm and vm's disks -> -> -* Yuchen (yu.chen@h3c.com) wrote: -> -> When migrate vm and vmâs disks target host qemu crash due to an invalid -> -free. -> -> -> -> #0 object_unref (obj=0x1000) at -> -> /qemu-2.12/rpmbuild/BUILD/qemu-2.12/qom/object.c:920 -> -> #1 0x0000560434d79e79 in memory_region_unref (mr=<optimized out>) -> -> at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:1730 -> -> #2 flatview_destroy (view=0x560439653880) at -> -> /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:292 -> -> #3 0x000056043514dfbe in call_rcu_thread (opaque=<optimized out>) -> -> at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/util/rcu.c:284 -> -> #4 0x00007fbc2b36fe25 in start_thread () from /lib64/libpthread.so.0 -> -> #5 0x00007fbc2b099bad in clone () from /lib64/libc.so.6 -> -> -> -> test base qemu-2.12.0ï¼but use lastest qemu(v6.0.0-rc2) also reproduce. -> -> -Interesting. -> -> -> As follow patch can resolve this problem: -> -> -https://lists.gnu.org/archive/html/qemu-devel/2018-07/msg02272.html -> -> -That's a pci/rcu change; ccing Paolo and Micahel. -> -> -> Steps to reproduce: -> -> (1) Create VM (virsh define) -> -> (2) Add 64 virtio scsi disks -> -> -Is that hot adding the disks later, or are they included in the VM at -> -creation? -> -Can you provide a libvirt XML example? -> -Include disks in the VM at creation - -vm disks xml (only virtio scsi disks): - <devices> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native'/> - <source file='/vms/tempp/vm-os'/> - <target dev='vda' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x08' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data1'/> - <target dev='sda' bus='scsi'/> - <address type='drive' controller='2' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data2'/> - <target dev='sdb' bus='scsi'/> - <address type='drive' controller='3' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data3'/> - <target dev='sdc' bus='scsi'/> - <address type='drive' controller='4' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data4'/> - <target dev='sdd' bus='scsi'/> - <address type='drive' controller='5' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data5'/> - <target dev='sde' bus='scsi'/> - <address type='drive' controller='6' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data6'/> - <target dev='sdf' bus='scsi'/> - <address type='drive' controller='7' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data7'/> - <target dev='sdg' bus='scsi'/> - <address type='drive' controller='8' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data8'/> - <target dev='sdh' bus='scsi'/> - <address type='drive' controller='9' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data9'/> - <target dev='sdi' bus='scsi'/> - <address type='drive' controller='10' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data10'/> - <target dev='sdj' bus='scsi'/> - <address type='drive' controller='11' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data11'/> - <target dev='sdk' bus='scsi'/> - <address type='drive' controller='12' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data12'/> - <target dev='sdl' bus='scsi'/> - <address type='drive' controller='13' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data13'/> - <target dev='sdm' bus='scsi'/> - <address type='drive' controller='14' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data14'/> - <target dev='sdn' bus='scsi'/> - <address type='drive' controller='15' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data15'/> - <target dev='sdo' bus='scsi'/> - <address type='drive' controller='16' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data16'/> - <target dev='sdp' bus='scsi'/> - <address type='drive' controller='17' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data17'/> - <target dev='sdq' bus='scsi'/> - <address type='drive' controller='18' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data18'/> - <target dev='sdr' bus='scsi'/> - <address type='drive' controller='19' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data19'/> - <target dev='sds' bus='scsi'/> - <address type='drive' controller='20' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data20'/> - <target dev='sdt' bus='scsi'/> - <address type='drive' controller='21' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data21'/> - <target dev='sdu' bus='scsi'/> - <address type='drive' controller='22' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data22'/> - <target dev='sdv' bus='scsi'/> - <address type='drive' controller='23' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data23'/> - <target dev='sdw' bus='scsi'/> - <address type='drive' controller='24' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data24'/> - <target dev='sdx' bus='scsi'/> - <address type='drive' controller='25' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data25'/> - <target dev='sdy' bus='scsi'/> - <address type='drive' controller='26' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data26'/> - <target dev='sdz' bus='scsi'/> - <address type='drive' controller='27' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data27'/> - <target dev='sdaa' bus='scsi'/> - <address type='drive' controller='28' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data28'/> - <target dev='sdab' bus='scsi'/> - <address type='drive' controller='29' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data29'/> - <target dev='sdac' bus='scsi'/> - <address type='drive' controller='30' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data30'/> - <target dev='sdad' bus='scsi'/> - <address type='drive' controller='31' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data31'/> - <target dev='sdae' bus='scsi'/> - <address type='drive' controller='32' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data32'/> - <target dev='sdaf' bus='scsi'/> - <address type='drive' controller='33' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data33'/> - <target dev='sdag' bus='scsi'/> - <address type='drive' controller='34' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data34'/> - <target dev='sdah' bus='scsi'/> - <address type='drive' controller='35' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data35'/> - <target dev='sdai' bus='scsi'/> - <address type='drive' controller='36' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data36'/> - <target dev='sdaj' bus='scsi'/> - <address type='drive' controller='37' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data37'/> - <target dev='sdak' bus='scsi'/> - <address type='drive' controller='38' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data38'/> - <target dev='sdal' bus='scsi'/> - <address type='drive' controller='39' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data39'/> - <target dev='sdam' bus='scsi'/> - <address type='drive' controller='40' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data40'/> - <target dev='sdan' bus='scsi'/> - <address type='drive' controller='41' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data41'/> - <target dev='sdao' bus='scsi'/> - <address type='drive' controller='42' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data42'/> - <target dev='sdap' bus='scsi'/> - <address type='drive' controller='43' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data43'/> - <target dev='sdaq' bus='scsi'/> - <address type='drive' controller='44' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data44'/> - <target dev='sdar' bus='scsi'/> - <address type='drive' controller='45' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data45'/> - <target dev='sdas' bus='scsi'/> - <address type='drive' controller='46' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data46'/> - <target dev='sdat' bus='scsi'/> - <address type='drive' controller='47' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data47'/> - <target dev='sdau' bus='scsi'/> - <address type='drive' controller='48' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data48'/> - <target dev='sdav' bus='scsi'/> - <address type='drive' controller='49' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data49'/> - <target dev='sdaw' bus='scsi'/> - <address type='drive' controller='50' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data50'/> - <target dev='sdax' bus='scsi'/> - <address type='drive' controller='51' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data51'/> - <target dev='sday' bus='scsi'/> - <address type='drive' controller='52' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data52'/> - <target dev='sdaz' bus='scsi'/> - <address type='drive' controller='53' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data53'/> - <target dev='sdba' bus='scsi'/> - <address type='drive' controller='54' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data54'/> - <target dev='sdbb' bus='scsi'/> - <address type='drive' controller='55' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data55'/> - <target dev='sdbc' bus='scsi'/> - <address type='drive' controller='56' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data56'/> - <target dev='sdbd' bus='scsi'/> - <address type='drive' controller='57' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data57'/> - <target dev='sdbe' bus='scsi'/> - <address type='drive' controller='58' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data58'/> - <target dev='sdbf' bus='scsi'/> - <address type='drive' controller='59' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data59'/> - <target dev='sdbg' bus='scsi'/> - <address type='drive' controller='60' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data60'/> - <target dev='sdbh' bus='scsi'/> - <address type='drive' controller='61' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data61'/> - <target dev='sdbi' bus='scsi'/> - <address type='drive' controller='62' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data62'/> - <target dev='sdbj' bus='scsi'/> - <address type='drive' controller='63' bus='0' target='0' unit='0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data63'/> - <target dev='sdbk' bus='scsi'/> - <address type='drive' controller='64' bus='0' target='0' unit='0'/> - </disk> - <controller type='scsi' index='0'> - <address type='pci' domain='0x0000' bus='0x01' slot='0x02' -function='0x0'/> - </controller> - <controller type='scsi' index='1' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x00' slot='0x06' -function='0x0'/> - </controller> - <controller type='scsi' index='2' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x01' slot='0x01' -function='0x0'/> - </controller> - <controller type='scsi' index='3' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x01' slot='0x03' -function='0x0'/> - </controller> - <controller type='scsi' index='4' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x01' slot='0x04' -function='0x0'/> - </controller> - <controller type='scsi' index='5' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x01' slot='0x05' -function='0x0'/> - </controller> - <controller type='scsi' index='6' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x01' slot='0x06' -function='0x0'/> - </controller> - <controller type='scsi' index='7' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x01' slot='0x07' -function='0x0'/> - </controller> - <controller type='scsi' index='8' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x01' slot='0x08' -function='0x0'/> - </controller> - <controller type='scsi' index='9' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x01' slot='0x09' -function='0x0'/> - </controller> - <controller type='scsi' index='10' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x01' slot='0x0a' -function='0x0'/> - </controller> - <controller type='scsi' index='11' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x01' slot='0x0b' -function='0x0'/> - </controller> - <controller type='scsi' index='12' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x01' slot='0x0c' -function='0x0'/> - </controller> - <controller type='scsi' index='13' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x01' slot='0x0d' -function='0x0'/> - </controller> - <controller type='scsi' index='14' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x01' slot='0x0e' -function='0x0'/> - </controller> - <controller type='scsi' index='15' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x01' slot='0x0f' -function='0x0'/> - </controller> - <controller type='scsi' index='16' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x01' slot='0x10' -function='0x0'/> - </controller> - <controller type='scsi' index='17' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x01' slot='0x11' -function='0x0'/> - </controller> - <controller type='scsi' index='18' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x01' slot='0x12' -function='0x0'/> - </controller> - <controller type='scsi' index='19' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x01' slot='0x13' -function='0x0'/> - </controller> - <controller type='scsi' index='20' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x01' slot='0x14' -function='0x0'/> - </controller> - <controller type='scsi' index='21' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x01' slot='0x15' -function='0x0'/> - </controller> - <controller type='scsi' index='22' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x01' slot='0x16' -function='0x0'/> - </controller> - <controller type='scsi' index='23' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x01' slot='0x17' -function='0x0'/> - </controller> - <controller type='scsi' index='24' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x01' slot='0x18' -function='0x0'/> - </controller> - <controller type='scsi' index='25' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x01' slot='0x19' -function='0x0'/> - </controller> - <controller type='scsi' index='26' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x01' slot='0x1a' -function='0x0'/> - </controller> - <controller type='scsi' index='27' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x01' slot='0x1b' -function='0x0'/> - </controller> - <controller type='scsi' index='28' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x01' slot='0x1c' -function='0x0'/> - </controller> - <controller type='scsi' index='29' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x01' slot='0x1d' -function='0x0'/> - </controller> - <controller type='scsi' index='30' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x01' slot='0x1e' -function='0x0'/> - </controller> - <controller type='scsi' index='31' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x02' slot='0x01' -function='0x0'/> - </controller> - <controller type='scsi' index='32' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x02' slot='0x02' -function='0x0'/> - </controller> - <controller type='scsi' index='33' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x02' slot='0x03' -function='0x0'/> - </controller> - <controller type='scsi' index='34' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x02' slot='0x04' -function='0x0'/> - </controller> - <controller type='scsi' index='35' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x02' slot='0x05' -function='0x0'/> - </controller> - <controller type='scsi' index='36' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x02' slot='0x06' -function='0x0'/> - </controller> - <controller type='scsi' index='37' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x02' slot='0x07' -function='0x0'/> - </controller> - <controller type='scsi' index='38' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x02' slot='0x08' -function='0x0'/> - </controller> - <controller type='scsi' index='39' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x02' slot='0x09' -function='0x0'/> - </controller> - <controller type='scsi' index='40' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x02' slot='0x0a' -function='0x0'/> - </controller> - <controller type='scsi' index='41' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x02' slot='0x0b' -function='0x0'/> - </controller> - <controller type='scsi' index='42' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x02' slot='0x0c' -function='0x0'/> - </controller> - <controller type='scsi' index='43' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x02' slot='0x0d' -function='0x0'/> - </controller> - <controller type='scsi' index='44' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x00' slot='0x03' -function='0x0'/> - </controller> - <controller type='scsi' index='45' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x00' slot='0x09' -function='0x0'/> - </controller> - <controller type='scsi' index='46' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x00' slot='0x0b' -function='0x0'/> - </controller> - <controller type='scsi' index='47' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x00' slot='0x0c' -function='0x0'/> - </controller> - <controller type='scsi' index='48' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x00' slot='0x0d' -function='0x0'/> - </controller> - <controller type='scsi' index='49' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x00' slot='0x0e' -function='0x0'/> - </controller> - <controller type='scsi' index='50' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x00' slot='0x0f' -function='0x0'/> - </controller> - <controller type='scsi' index='51' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x00' slot='0x10' -function='0x0'/> - </controller> - <controller type='scsi' index='52' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x00' slot='0x11' -function='0x0'/> - </controller> - <controller type='scsi' index='53' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x00' slot='0x12' -function='0x0'/> - </controller> - <controller type='scsi' index='54' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x00' slot='0x13' -function='0x0'/> - </controller> - <controller type='scsi' index='55' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x00' slot='0x14' -function='0x0'/> - </controller> - <controller type='scsi' index='56' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x00' slot='0x15' -function='0x0'/> - </controller> - <controller type='scsi' index='57' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x00' slot='0x16' -function='0x0'/> - </controller> - <controller type='scsi' index='58' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x00' slot='0x17' -function='0x0'/> - </controller> - <controller type='scsi' index='59' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x00' slot='0x18' -function='0x0'/> - </controller> - <controller type='scsi' index='60' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x00' slot='0x19' -function='0x0'/> - </controller> - <controller type='scsi' index='61' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x00' slot='0x1a' -function='0x0'/> - </controller> - <controller type='scsi' index='62' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x00' slot='0x1b' -function='0x0'/> - </controller> - <controller type='scsi' index='63' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x00' slot='0x1c' -function='0x0'/> - </controller> - <controller type='scsi' index='64' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x00' slot='0x1d' -function='0x0'/> - </controller> - <controller type='pci' index='0' model='pci-root'/> - <controller type='pci' index='1' model='pci-bridge'> - <model name='pci-bridge'/> - <target chassisNr='1'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x1f' -function='0x0'/> - </controller> - <controller type='pci' index='2' model='pci-bridge'> - <model name='pci-bridge'/> - <target chassisNr='2'/> - <address type='pci' domain='0x0000' bus='0x01' slot='0x1f' -function='0x0'/> - </controller> - </devices> - -vm disks xml (only virtio disks): - <devices> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native'/> - <source file='/vms/tempp/vm-os'/> - <target dev='vda' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x08' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data2'/> - <target dev='vdb' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x06' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data3'/> - <target dev='vdc' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x09' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data4'/> - <target dev='vdd' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x0b' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data5'/> - <target dev='vde' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x0c' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data6'/> - <target dev='vdf' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x0d' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data7'/> - <target dev='vdg' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x0e' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data8'/> - <target dev='vdh' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x0f' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data9'/> - <target dev='vdi' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x10' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data10'/> - <target dev='vdj' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x11' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data11'/> - <target dev='vdk' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x12' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data12'/> - <target dev='vdl' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x13' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data13'/> - <target dev='vdm' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x14' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data14'/> - <target dev='vdn' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x15' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data15'/> - <target dev='vdo' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x16' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data16'/> - <target dev='vdp' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x17' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data17'/> - <target dev='vdq' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x18' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data18'/> - <target dev='vdr' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x19' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data19'/> - <target dev='vds' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x1a' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data20'/> - <target dev='vdt' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x1b' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data21'/> - <target dev='vdu' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x1c' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data22'/> - <target dev='vdv' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x1d' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data23'/> - <target dev='vdw' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x1e' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data24'/> - <target dev='vdx' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x01' slot='0x01' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data25'/> - <target dev='vdy' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x01' slot='0x03' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data26'/> - <target dev='vdz' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x01' slot='0x04' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data27'/> - <target dev='vdaa' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x01' slot='0x05' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data28'/> - <target dev='vdab' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x01' slot='0x06' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data29'/> - <target dev='vdac' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x01' slot='0x07' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data30'/> - <target dev='vdad' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x01' slot='0x08' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data31'/> - <target dev='vdae' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x01' slot='0x09' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data32'/> - <target dev='vdaf' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x01' slot='0x0a' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data33'/> - <target dev='vdag' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x01' slot='0x0b' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data34'/> - <target dev='vdah' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x01' slot='0x0c' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data35'/> - <target dev='vdai' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x01' slot='0x0d' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data36'/> - <target dev='vdaj' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x01' slot='0x0e' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data37'/> - <target dev='vdak' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x01' slot='0x0f' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data38'/> - <target dev='vdal' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x01' slot='0x10' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data39'/> - <target dev='vdam' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x01' slot='0x11' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data40'/> - <target dev='vdan' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x01' slot='0x12' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data41'/> - <target dev='vdao' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x01' slot='0x13' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data42'/> - <target dev='vdap' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x01' slot='0x14' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data43'/> - <target dev='vdaq' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x01' slot='0x15' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data44'/> - <target dev='vdar' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x01' slot='0x16' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data45'/> - <target dev='vdas' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x01' slot='0x17' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data46'/> - <target dev='vdat' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x01' slot='0x18' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data47'/> - <target dev='vdau' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x01' slot='0x19' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data48'/> - <target dev='vdav' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x01' slot='0x1a' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data49'/> - <target dev='vdaw' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x01' slot='0x1b' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data50'/> - <target dev='vdax' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x01' slot='0x1c' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data51'/> - <target dev='vday' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x01' slot='0x1d' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data52'/> - <target dev='vdaz' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x01' slot='0x1e' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data53'/> - <target dev='vdba' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x02' slot='0x01' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data54'/> - <target dev='vdbb' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x02' slot='0x02' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data55'/> - <target dev='vdbc' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x02' slot='0x03' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data56'/> - <target dev='vdbd' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x02' slot='0x04' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data57'/> - <target dev='vdbe' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x02' slot='0x05' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data58'/> - <target dev='vdbf' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x02' slot='0x06' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data59'/> - <target dev='vdbg' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x02' slot='0x07' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data60'/> - <target dev='vdbh' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x02' slot='0x08' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data61'/> - <target dev='vdbi' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x02' slot='0x09' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data62'/> - <target dev='vdbj' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x02' slot='0x0a' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data63'/> - <target dev='vdbk' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x02' slot='0x0b' -function='0x0'/> - </disk> - <disk type='file' device='disk'> - <driver name='qemu' type='qcow2' cache='directsync' io='native' -discard='unmap'/> - <source file='/vms/tempp/vm-data1'/> - <target dev='vdbl' bus='virtio'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x03' -function='0x0'/> - </disk> - <controller type='pci' index='0' model='pci-root'/> - <controller type='pci' index='1' model='pci-bridge'> - <model name='pci-bridge'/> - <target chassisNr='1'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x1f' -function='0x0'/> - </controller> - <controller type='pci' index='2' model='pci-bridge'> - <model name='pci-bridge'/> - <target chassisNr='2'/> - <address type='pci' domain='0x0000' bus='0x01' slot='0x1f' -function='0x0'/> - </controller> - </devices> - -> -> (3) migrate vm and vmâdisks -> -> -What do you mean by 'and vm disks' - are you doing a block migration? -> -Yes, block migration. -In fact, only migration domain also reproduced. - -> -Dave -> -> -> ---------------------------------------------------------------------- -> -> --------------------------------------------------------------- -> -Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK -------------------------------------------------------------------------------------------------------------------------------------- -æ¬é®ä»¶åå ¶é件嫿æ°åä¸éå¢çä¿å¯ä¿¡æ¯ï¼ä» éäºåéç»ä¸é¢å°åä¸ååº -ç个人æç¾¤ç»ãç¦æ¢ä»»ä½å ¶ä»äººä»¥ä»»ä½å½¢å¼ä½¿ç¨ï¼å æ¬ä½ä¸éäºå ¨é¨æé¨åå°æ³é²ãå¤å¶ã -ææ£åï¼æ¬é®ä»¶ä¸çä¿¡æ¯ã妿æ¨éæ¶äºæ¬é®ä»¶ï¼è¯·æ¨ç«å³çµè¯æé®ä»¶éç¥å件人并å 餿¬ -é®ä»¶ï¼ -This e-mail and its attachments contain confidential information from New H3C, -which is -intended only for the person or entity whose address is listed above. Any use -of the -information contained herein in any way (including, but not limited to, total -or partial -disclosure, reproduction, or dissemination) by persons other than the intended -recipient(s) is prohibited. If you receive this e-mail in error, please notify -the sender -by phone or email immediately and delete it! - diff --git a/classification_output/03/KVM/80615920 b/classification_output/03/KVM/80615920 deleted file mode 100644 index f6b9b5c9c..000000000 --- a/classification_output/03/KVM/80615920 +++ /dev/null @@ -1,351 +0,0 @@ -KVM: 0.803 -mistranslation: 0.800 -other: 0.786 -instruction: 0.751 -boot: 0.750 -semantic: 0.737 -network: 0.732 - -[BUG] accel/tcg: cpu_exec_longjmp_cleanup: assertion failed: (cpu == current_cpu) - -It seems there is a bug in SIGALRM handling when 486 system emulates x86_64 -code. - -This code: - -#include <stdio.h> -#include <stdlib.h> -#include <pthread.h> -#include <signal.h> -#include <unistd.h> - -pthread_t thread1, thread2; - -// Signal handler for SIGALRM -void alarm_handler(int sig) { - // Do nothing, just wake up the other thread -} - -// Thread 1 function -void* thread1_func(void* arg) { - // Set up the signal handler for SIGALRM - signal(SIGALRM, alarm_handler); - - // Wait for 5 seconds - sleep(1); - - // Send SIGALRM signal to thread 2 - pthread_kill(thread2, SIGALRM); - - return NULL; -} - -// Thread 2 function -void* thread2_func(void* arg) { - // Wait for the SIGALRM signal - pause(); - - printf("Thread 2 woke up!\n"); - - return NULL; -} - -int main() { - // Create thread 1 - if (pthread_create(&thread1, NULL, thread1_func, NULL) != 0) { - fprintf(stderr, "Failed to create thread 1\n"); - return 1; - } - - // Create thread 2 - if (pthread_create(&thread2, NULL, thread2_func, NULL) != 0) { - fprintf(stderr, "Failed to create thread 2\n"); - return 1; - } - - // Wait for both threads to finish - pthread_join(thread1, NULL); - pthread_join(thread2, NULL); - - return 0; -} - - -Fails with this -strace log (there are also unsupported syscalls 334 and 435, -but it seems it doesn't affect the code much): - -... -736 rt_sigaction(SIGALRM,0x000000001123ec20,0x000000001123ecc0) = 0 -736 clock_nanosleep(CLOCK_REALTIME,0,{tv_sec = 1,tv_nsec = 0},{tv_sec = -1,tv_nsec = 0}) -736 rt_sigprocmask(SIG_BLOCK,0x00000000109fad20,0x0000000010800b38,8) = 0 -736 Unknown syscall 435 -736 -clone(CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID| - ... -736 rt_sigprocmask(SIG_SETMASK,0x0000000010800b38,NULL,8) -736 set_robust_list(0x11a419a0,0) = -1 errno=38 (Function not implemented) -736 rt_sigprocmask(SIG_SETMASK,0x0000000011a41fb0,NULL,8) = 0 - = 0 -736 pause(0,0,2,277186368,0,295966400) -736 -futex(0x000000001123f990,FUTEX_CLOCK_REALTIME|FUTEX_WAIT_BITSET,738,NULL,NULL,0) - = 0 -736 rt_sigprocmask(SIG_BLOCK,0x00000000109fad20,0x000000001123ee88,8) = 0 -736 getpid() = 736 -736 tgkill(736,739,SIGALRM) = 0 - = -1 errno=4 (Interrupted system call) ---- SIGALRM {si_signo=SIGALRM, si_code=SI_TKILL, si_pid=736, si_uid=0} --- -0x48874a != 0x3c69e10 -736 rt_sigprocmask(SIG_SETMASK,0x000000001123ee88,NULL,8) = 0 -** -ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion failed: -(cpu == current_cpu) -Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion -failed: (cpu == current_cpu) -0x48874a != 0x3c69e10 -** -ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion failed: -(cpu == current_cpu) -Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion -failed: (cpu == current_cpu) -# - -The code fails either with or without -singlestep, the command line: - -/usr/bin/qemu-x86_64 -L /opt/x86_64 -strace -singlestep /opt/x86_64/alarm.bin - -Source code of QEMU 8.1.1 was modified with patch "[PATCH] qemu/timer: Don't -use RDTSC on i486" [1], -with added few ioctls (not relevant) and cpu_exec_longjmp_cleanup() now prints -current pointers of -cpu and current_cpu (line "0x48874a != 0x3c69e10"). - -config.log (built as a part of buildroot, basically the minimal possible -configuration for running x86_64 on 486): - -# Configured with: -'/mnt/hd_8tb_p1/p1/home/crossgen/buildroot_486_2/output/build/qemu-8.1.1/configure' - '--prefix=/usr' -'--cross-prefix=/mnt/hd_8tb_p1/p1/home/crossgen/buildroot_486_2/output/host/bin/i486-buildroot-linux-gnu-' - '--audio-drv-list=' -'--python=/mnt/hd_8tb_p1/p1/home/crossgen/buildroot_486_2/output/host/bin/python3' - -'--ninja=/mnt/hd_8tb_p1/p1/home/crossgen/buildroot_486_2/output/host/bin/ninja' -'--disable-alsa' '--disable-bpf' '--disable-brlapi' '--disable-bsd-user' -'--disable-cap-ng' '--disable-capstone' '--disable-containers' -'--disable-coreaudio' '--disable-curl' '--disable-curses' -'--disable-dbus-display' '--disable-docs' '--disable-dsound' '--disable-hvf' -'--disable-jack' '--disable-libiscsi' '--disable-linux-aio' -'--disable-linux-io-uring' '--disable-malloc-trim' '--disable-membarrier' -'--disable-mpath' '--disable-netmap' '--disable-opengl' '--disable-oss' -'--disable-pa' '--disable-rbd' '--disable-sanitizers' '--disable-selinux' -'--disable-sparse' '--disable-strip' '--disable-vde' '--disable-vhost-crypto' -'--disable-vhost-user-blk-server' '--disable-virtfs' '--disable-whpx' -'--disable-xen' '--disable-attr' '--disable-kvm' '--disable-vhost-net' -'--disable-download' '--disable-hexagon-idef-parser' '--disable-system' -'--enable-linux-user' '--target-list=x86_64-linux-user' '--disable-vhost-user' -'--disable-slirp' '--disable-sdl' '--disable-fdt' '--enable-trace-backends=nop' -'--disable-tools' '--disable-guest-agent' '--disable-fuse' -'--disable-fuse-lseek' '--disable-seccomp' '--disable-libssh' -'--disable-libusb' '--disable-vnc' '--disable-nettle' '--disable-numa' -'--disable-pipewire' '--disable-spice' '--disable-usb-redir' -'--disable-install-blobs' - -Emulation of the same x86_64 code with qemu 6.2.0 installed on another x86_64 -native machine works fine. - -[1] -https://lists.nongnu.org/archive/html/qemu-devel/2023-11/msg05387.html -Best regards, -Petr - -On Sat, 25 Nov 2023 at 13:09, Petr Cvek <petrcvekcz@gmail.com> wrote: -> -> -It seems there is a bug in SIGALRM handling when 486 system emulates x86_64 -> -code. -486 host is pretty well out of support currently. Can you reproduce -this on a less ancient host CPU type ? - -> -ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion failed: -> -(cpu == current_cpu) -> -Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: -> -assertion failed: (cpu == current_cpu) -> -0x48874a != 0x3c69e10 -> -** -> -ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion failed: -> -(cpu == current_cpu) -> -Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: -> -assertion failed: (cpu == current_cpu) -What compiler version do you build QEMU with? That -assert is there because we have seen some buggy compilers -in the past which don't correctly preserve the variable -value as the setjmp/longjmp spec requires them to. - -thanks --- PMM - -Dne 27. 11. 23 v 10:37 Peter Maydell napsal(a): -> -On Sat, 25 Nov 2023 at 13:09, Petr Cvek <petrcvekcz@gmail.com> wrote: -> -> -> -> It seems there is a bug in SIGALRM handling when 486 system emulates x86_64 -> -> code. -> -> -486 host is pretty well out of support currently. Can you reproduce -> -this on a less ancient host CPU type ? -> -It seems it only fails when the code is compiled for i486. QEMU built with the -same compiler with -march=i586 and above runs on the same physical hardware -without a problem. All -march= variants were executed on ryzen 3600. - -> -> ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion -> -> failed: (cpu == current_cpu) -> -> Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: -> -> assertion failed: (cpu == current_cpu) -> -> 0x48874a != 0x3c69e10 -> -> ** -> -> ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion -> -> failed: (cpu == current_cpu) -> -> Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: -> -> assertion failed: (cpu == current_cpu) -> -> -What compiler version do you build QEMU with? That -> -assert is there because we have seen some buggy compilers -> -in the past which don't correctly preserve the variable -> -value as the setjmp/longjmp spec requires them to. -> -i486 and i586+ code variants were compiled with GCC 13.2.0 (more exactly, -slackware64 current multilib distribution). - -i486 binary which runs on the real 486 is also GCC 13.2.0 and installed as a -part of the buildroot crosscompiler (about two week old git snapshot). - -> -thanks -> --- PMM -best regards, -Petr - -On 11/25/23 07:08, Petr Cvek wrote: -ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion failed: -(cpu == current_cpu) -Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion -failed: (cpu == current_cpu) -# - -The code fails either with or without -singlestep, the command line: - -/usr/bin/qemu-x86_64 -L /opt/x86_64 -strace -singlestep /opt/x86_64/alarm.bin - -Source code of QEMU 8.1.1 was modified with patch "[PATCH] qemu/timer: Don't use -RDTSC on i486" [1], -with added few ioctls (not relevant) and cpu_exec_longjmp_cleanup() now prints -current pointers of -cpu and current_cpu (line "0x48874a != 0x3c69e10"). -If you try this again with 8.2-rc2, you should not see an assertion failure. -You should see instead - -QEMU internal SIGILL {code=ILLOPC, addr=0x12345678} -which I think more accurately summarizes the situation of attempting RDTSC on hardware -that does not support it. -r~ - -Dne 29. 11. 23 v 15:25 Richard Henderson napsal(a): -> -On 11/25/23 07:08, Petr Cvek wrote: -> -> ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion -> -> failed: (cpu == current_cpu) -> -> Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: -> -> assertion failed: (cpu == current_cpu) -> -> # -> -> -> -> The code fails either with or without -singlestep, the command line: -> -> -> -> /usr/bin/qemu-x86_64 -L /opt/x86_64 -strace -singlestep -> -> /opt/x86_64/alarm.bin -> -> -> -> Source code of QEMU 8.1.1 was modified with patch "[PATCH] qemu/timer: Don't -> -> use RDTSC on i486" [1], -> -> with added few ioctls (not relevant) and cpu_exec_longjmp_cleanup() now -> -> prints current pointers of -> -> cpu and current_cpu (line "0x48874a != 0x3c69e10"). -> -> -> -If you try this again with 8.2-rc2, you should not see an assertion failure. -> -You should see instead -> -> -QEMU internal SIGILL {code=ILLOPC, addr=0x12345678} -> -> -which I think more accurately summarizes the situation of attempting RDTSC on -> -hardware that does not support it. -> -> -Compilation of vanilla qemu v8.2.0-rc2 with -march=i486 by GCC 13.2.0 and -running the resulting binary on ryzen still leads to: - -** -ERROR:../accel/tcg/cpu-exec.c:533:cpu_exec_longjmp_cleanup: assertion failed: -(cpu == current_cpu) -Bail out! ERROR:../accel/tcg/cpu-exec.c:533:cpu_exec_longjmp_cleanup: assertion -failed: (cpu == current_cpu) -Aborted - -> -> -r~ -Petr - |