diff options
Diffstat (limited to 'results/classifier/014/none')
| -rw-r--r-- | results/classifier/014/none/11933524 | 1152 | ||||
| -rw-r--r-- | results/classifier/014/none/16228234 | 1871 | ||||
| -rw-r--r-- | results/classifier/014/none/21247035 | 1348 | ||||
| -rw-r--r-- | results/classifier/014/none/33802194 | 4966 | ||||
| -rw-r--r-- | results/classifier/014/none/42613410 | 176 | ||||
| -rw-r--r-- | results/classifier/014/none/43643137 | 565 | ||||
| -rw-r--r-- | results/classifier/014/none/55367348 | 559 | ||||
| -rw-r--r-- | results/classifier/014/none/55753058 | 320 | ||||
| -rw-r--r-- | results/classifier/014/none/56309929 | 207 | ||||
| -rw-r--r-- | results/classifier/014/none/65781993 | 2820 | ||||
| -rw-r--r-- | results/classifier/014/none/68897003 | 743 | ||||
| -rw-r--r-- | results/classifier/014/none/70868267 | 67 | ||||
| -rw-r--r-- | results/classifier/014/none/71456293 | 1513 |
13 files changed, 16307 insertions, 0 deletions
diff --git a/results/classifier/014/none/11933524 b/results/classifier/014/none/11933524 new file mode 100644 index 00000000..784230bd --- /dev/null +++ b/results/classifier/014/none/11933524 @@ -0,0 +1,1152 @@ +VMM: 0.795 +PID: 0.791 +register: 0.784 +operating system: 0.781 +risc-v: 0.776 +ppc: 0.765 +device: 0.762 +assembly: 0.754 +permissions: 0.752 +debug: 0.752 +socket: 0.751 +architecture: 0.745 +boot: 0.743 +graphic: 0.737 +performance: 0.736 +hypervisor: 0.730 +kernel: 0.727 +mistranslation: 0.719 +user-level: 0.702 +TCG: 0.696 +vnc: 0.695 +peripherals: 0.690 +KVM: 0.689 +arm: 0.689 +i386: 0.682 +virtual: 0.676 +semantic: 0.673 +network: 0.662 +files: 0.660 +x86: 0.610 +alpha: 0.591 + +[BUG] hw/i386/pc.c: CXL Fixed Memory Window should not reserve e820 in bios + +Early-boot e820 records will be inserted by the bios/efi/early boot +software and be reported to the kernel via insert_resource. Later, when +CXL drivers iterate through the regions again, they will insert another +resource and make the RESERVED memory area a child. + +This RESERVED memory area causes the memory region to become unusable, +and as a result attempting to create memory regions with + + `cxl create-region ...` + +Will fail due to the RESERVED area intersecting with the CXL window. + + +During boot the following traceback is observed: + +0xffffffff81101650 in insert_resource_expand_to_fit () +0xffffffff83d964c5 in e820__reserve_resources_late () +0xffffffff83e03210 in pcibios_resource_survey () +0xffffffff83e04f4a in pcibios_init () + +Which produces a call to reserve the CFMWS area: + +(gdb) p *new +$54 = {start = 0x290000000, end = 0x2cfffffff, name = "Reserved", + flags = 0x200, desc = 0x7, parent = 0x0, sibling = 0x0, + child = 0x0} + +Later the Kernel parses ACPI tables and reserves the exact same area as +the CXL Fixed Memory Window. The use of `insert_resource_conflict` +retains the RESERVED region and makes it a child of the new region. + +0xffffffff811016a4 in insert_resource_conflict () + insert_resource () +0xffffffff81a81389 in cxl_parse_cfmws () +0xffffffff818c4a81 in call_handler () + acpi_parse_entries_array () + +(gdb) p/x *new +$59 = {start = 0x290000000, end = 0x2cfffffff, name = "CXL Window 0", + flags = 0x200, desc = 0x0, parent = 0x0, sibling = 0x0, + child = 0x0} + +This produces the following output in /proc/iomem: + +590000000-68fffffff : CXL Window 0 + 590000000-68fffffff : Reserved + +This reserved area causes `get_free_mem_region()` to fail due to a check +against `__region_intersects()`. Due to this reserved area, the +intersect check will only ever return REGION_INTERSECTS, which causes +`cxl create-region` to always fail. + +Signed-off-by: Gregory Price <gregory.price@memverge.com> +--- + hw/i386/pc.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 566accf7e6..5bf5465a21 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, + hwaddr cxl_size = MiB; + + cxl_base = pc_get_cxl_range_start(pcms); +- e820_add_entry(cxl_base, cxl_size, E820_RESERVED); + memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); + memory_region_add_subregion(system_memory, cxl_base, mr); + cxl_resv_end = cxl_base + cxl_size; +@@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, + memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, fw, + "cxl-fixed-memory-region", fw->size); + memory_region_add_subregion(system_memory, fw->base, &fw->mr); +- e820_add_entry(fw->base, fw->size, E820_RESERVED); + cxl_fmw_base += fw->size; + cxl_resv_end = cxl_fmw_base; + } +-- +2.37.3 + +Early-boot e820 records will be inserted by the bios/efi/early boot +software and be reported to the kernel via insert_resource. Later, when +CXL drivers iterate through the regions again, they will insert another +resource and make the RESERVED memory area a child. + +This RESERVED memory area causes the memory region to become unusable, +and as a result attempting to create memory regions with + + `cxl create-region ...` + +Will fail due to the RESERVED area intersecting with the CXL window. + + +During boot the following traceback is observed: + +0xffffffff81101650 in insert_resource_expand_to_fit () +0xffffffff83d964c5 in e820__reserve_resources_late () +0xffffffff83e03210 in pcibios_resource_survey () +0xffffffff83e04f4a in pcibios_init () + +Which produces a call to reserve the CFMWS area: + +(gdb) p *new +$54 = {start = 0x290000000, end = 0x2cfffffff, name = "Reserved", + flags = 0x200, desc = 0x7, parent = 0x0, sibling = 0x0, + child = 0x0} + +Later the Kernel parses ACPI tables and reserves the exact same area as +the CXL Fixed Memory Window. The use of `insert_resource_conflict` +retains the RESERVED region and makes it a child of the new region. + +0xffffffff811016a4 in insert_resource_conflict () + insert_resource () +0xffffffff81a81389 in cxl_parse_cfmws () +0xffffffff818c4a81 in call_handler () + acpi_parse_entries_array () + +(gdb) p/x *new +$59 = {start = 0x290000000, end = 0x2cfffffff, name = "CXL Window 0", + flags = 0x200, desc = 0x0, parent = 0x0, sibling = 0x0, + child = 0x0} + +This produces the following output in /proc/iomem: + +590000000-68fffffff : CXL Window 0 + 590000000-68fffffff : Reserved + +This reserved area causes `get_free_mem_region()` to fail due to a check +against `__region_intersects()`. Due to this reserved area, the +intersect check will only ever return REGION_INTERSECTS, which causes +`cxl create-region` to always fail. + +Signed-off-by: Gregory Price <gregory.price@memverge.com> +--- + hw/i386/pc.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 566accf7e6..5bf5465a21 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, + hwaddr cxl_size = MiB; +cxl_base = pc_get_cxl_range_start(pcms); +- e820_add_entry(cxl_base, cxl_size, E820_RESERVED); + memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); + memory_region_add_subregion(system_memory, cxl_base, mr); + cxl_resv_end = cxl_base + cxl_size; +@@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, + memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, +fw, + "cxl-fixed-memory-region", fw->size); + memory_region_add_subregion(system_memory, fw->base, &fw->mr); +Or will this be subregion of cxl_base? + +Thanks, +Pankaj +- e820_add_entry(fw->base, fw->size, E820_RESERVED); + cxl_fmw_base += fw->size; + cxl_resv_end = cxl_fmw_base; + } + +> +> - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +> +> memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); +> +> memory_region_add_subregion(system_memory, cxl_base, mr); +> +> cxl_resv_end = cxl_base + cxl_size; +> +> @@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, +> +> memory_region_init_io(&fw->mr, OBJECT(machine), +> +> &cfmws_ops, fw, +> +> "cxl-fixed-memory-region", +> +> fw->size); +> +> memory_region_add_subregion(system_memory, fw->base, +> +> &fw->mr); +> +> +Or will this be subregion of cxl_base? +> +> +Thanks, +> +Pankaj +The memory region backing this memory area still has to be initialized +and added in the QEMU system, but it will now be initialized for use by +linux after PCI/ACPI setup occurs and the CXL driver discovers it via +CDAT. + +It's also still possible to assign this area a static memory region at +bool by setting up the SRATs in the ACPI tables, but that patch is not +upstream yet. + +On Tue, Oct 18, 2022 at 5:14 AM Gregory Price <gourry.memverge@gmail.com> wrote: +> +> +Early-boot e820 records will be inserted by the bios/efi/early boot +> +software and be reported to the kernel via insert_resource. Later, when +> +CXL drivers iterate through the regions again, they will insert another +> +resource and make the RESERVED memory area a child. +I have already sent a patch +https://www.mail-archive.com/qemu-devel@nongnu.org/msg882012.html +. +When the patch is applied, there would not be any reserved entries +even with passing E820_RESERVED . +So this patch needs to be evaluated in the light of the above patch I +sent. Once you apply my patch, does the issue still exist? + +> +> +This RESERVED memory area causes the memory region to become unusable, +> +and as a result attempting to create memory regions with +> +> +`cxl create-region ...` +> +> +Will fail due to the RESERVED area intersecting with the CXL window. +> +> +> +During boot the following traceback is observed: +> +> +0xffffffff81101650 in insert_resource_expand_to_fit () +> +0xffffffff83d964c5 in e820__reserve_resources_late () +> +0xffffffff83e03210 in pcibios_resource_survey () +> +0xffffffff83e04f4a in pcibios_init () +> +> +Which produces a call to reserve the CFMWS area: +> +> +(gdb) p *new +> +$54 = {start = 0x290000000, end = 0x2cfffffff, name = "Reserved", +> +flags = 0x200, desc = 0x7, parent = 0x0, sibling = 0x0, +> +child = 0x0} +> +> +Later the Kernel parses ACPI tables and reserves the exact same area as +> +the CXL Fixed Memory Window. The use of `insert_resource_conflict` +> +retains the RESERVED region and makes it a child of the new region. +> +> +0xffffffff811016a4 in insert_resource_conflict () +> +insert_resource () +> +0xffffffff81a81389 in cxl_parse_cfmws () +> +0xffffffff818c4a81 in call_handler () +> +acpi_parse_entries_array () +> +> +(gdb) p/x *new +> +$59 = {start = 0x290000000, end = 0x2cfffffff, name = "CXL Window 0", +> +flags = 0x200, desc = 0x0, parent = 0x0, sibling = 0x0, +> +child = 0x0} +> +> +This produces the following output in /proc/iomem: +> +> +590000000-68fffffff : CXL Window 0 +> +590000000-68fffffff : Reserved +> +> +This reserved area causes `get_free_mem_region()` to fail due to a check +> +against `__region_intersects()`. Due to this reserved area, the +> +intersect check will only ever return REGION_INTERSECTS, which causes +> +`cxl create-region` to always fail. +> +> +Signed-off-by: Gregory Price <gregory.price@memverge.com> +> +--- +> +hw/i386/pc.c | 2 -- +> +1 file changed, 2 deletions(-) +> +> +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +> +index 566accf7e6..5bf5465a21 100644 +> +--- a/hw/i386/pc.c +> ++++ b/hw/i386/pc.c +> +@@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, +> +hwaddr cxl_size = MiB; +> +> +cxl_base = pc_get_cxl_range_start(pcms); +> +- e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +> +memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); +> +memory_region_add_subregion(system_memory, cxl_base, mr); +> +cxl_resv_end = cxl_base + cxl_size; +> +@@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, +> +memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, +> +fw, +> +"cxl-fixed-memory-region", fw->size); +> +memory_region_add_subregion(system_memory, fw->base, +> +&fw->mr); +> +- e820_add_entry(fw->base, fw->size, E820_RESERVED); +> +cxl_fmw_base += fw->size; +> +cxl_resv_end = cxl_fmw_base; +> +} +> +-- +> +2.37.3 +> + +This patch does not resolve the issue, reserved entries are still created. +[  0.000000] BIOS-e820: [mem 0x0000000280000000-0x00000002800fffff] reserved +[  0.000000] BIOS-e820: [mem 0x0000000290000000-0x000000029fffffff] reserved +# cat /proc/iomem +290000000-29fffffff : CXL Window 0 + 290000000-29fffffff : Reserved +# cxl create-region -m -d decoder0.0 -w 1 -g 256 mem0 +cxl region: create_region: region0: set_size failed: Numerical result out of range +cxl region: cmd_create_region: created 0 regions +On Tue, Oct 18, 2022 at 2:05 AM Ani Sinha < +ani@anisinha.ca +> wrote: +On Tue, Oct 18, 2022 at 5:14 AM Gregory Price < +gourry.memverge@gmail.com +> wrote: +> +> Early-boot e820 records will be inserted by the bios/efi/early boot +> software and be reported to the kernel via insert_resource. Later, when +> CXL drivers iterate through the regions again, they will insert another +> resource and make the RESERVED memory area a child. +I have already sent a patch +https://www.mail-archive.com/qemu-devel@nongnu.org/msg882012.html +. +When the patch is applied, there would not be any reserved entries +even with passing E820_RESERVED . +So this patch needs to be evaluated in the light of the above patch I +sent. Once you apply my patch, does the issue still exist? +> +> This RESERVED memory area causes the memory region to become unusable, +> and as a result attempting to create memory regions with +> +>   `cxl create-region ...` +> +> Will fail due to the RESERVED area intersecting with the CXL window. +> +> +> During boot the following traceback is observed: +> +> 0xffffffff81101650 in insert_resource_expand_to_fit () +> 0xffffffff83d964c5 in e820__reserve_resources_late () +> 0xffffffff83e03210 in pcibios_resource_survey () +> 0xffffffff83e04f4a in pcibios_init () +> +> Which produces a call to reserve the CFMWS area: +> +> (gdb) p *new +> $54 = {start = 0x290000000, end = 0x2cfffffff, name = "Reserved", +>    flags = 0x200, desc = 0x7, parent = 0x0, sibling = 0x0, +>    child = 0x0} +> +> Later the Kernel parses ACPI tables and reserves the exact same area as +> the CXL Fixed Memory Window. The use of `insert_resource_conflict` +> retains the RESERVED region and makes it a child of the new region. +> +> 0xffffffff811016a4 in insert_resource_conflict () +>            insert_resource () +> 0xffffffff81a81389 in cxl_parse_cfmws () +> 0xffffffff818c4a81 in call_handler () +>            acpi_parse_entries_array () +> +> (gdb) p/x *new +> $59 = {start = 0x290000000, end = 0x2cfffffff, name = "CXL Window 0", +>    flags = 0x200, desc = 0x0, parent = 0x0, sibling = 0x0, +>    child = 0x0} +> +> This produces the following output in /proc/iomem: +> +> 590000000-68fffffff : CXL Window 0 +>  590000000-68fffffff : Reserved +> +> This reserved area causes `get_free_mem_region()` to fail due to a check +> against `__region_intersects()`. Due to this reserved area, the +> intersect check will only ever return REGION_INTERSECTS, which causes +> `cxl create-region` to always fail. +> +> Signed-off-by: Gregory Price < +gregory.price@memverge.com +> +> --- +> hw/i386/pc.c | 2 -- +> 1 file changed, 2 deletions(-) +> +> diff --git a/hw/i386/pc.c b/hw/i386/pc.c +> index 566accf7e6..5bf5465a21 100644 +> --- a/hw/i386/pc.c +> +++ b/hw/i386/pc.c +> @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, +>     hwaddr cxl_size = MiB; +> +>     cxl_base = pc_get_cxl_range_start(pcms); +> -    e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +>     memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); +>     memory_region_add_subregion(system_memory, cxl_base, mr); +>     cxl_resv_end = cxl_base + cxl_size; +> @@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, +>         memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, fw, +>                    "cxl-fixed-memory-region", fw->size); +>         memory_region_add_subregion(system_memory, fw->base, &fw->mr); +> -        e820_add_entry(fw->base, fw->size, E820_RESERVED); +>         cxl_fmw_base += fw->size; +>         cxl_resv_end = cxl_fmw_base; +>       } +> -- +> 2.37.3 +> + ++Gerd Hoffmann + +On Tue, Oct 18, 2022 at 8:16 PM Gregory Price <gourry.memverge@gmail.com> wrote: +> +> +This patch does not resolve the issue, reserved entries are still created. +> +> +[ 0.000000] BIOS-e820: [mem 0x0000000280000000-0x00000002800fffff] reserved +> +[ 0.000000] BIOS-e820: [mem 0x0000000290000000-0x000000029fffffff] reserved +> +> +# cat /proc/iomem +> +290000000-29fffffff : CXL Window 0 +> +290000000-29fffffff : Reserved +> +> +# cxl create-region -m -d decoder0.0 -w 1 -g 256 mem0 +> +cxl region: create_region: region0: set_size failed: Numerical result out of +> +range +> +cxl region: cmd_create_region: created 0 regions +> +> +On Tue, Oct 18, 2022 at 2:05 AM Ani Sinha <ani@anisinha.ca> wrote: +> +> +> +> On Tue, Oct 18, 2022 at 5:14 AM Gregory Price <gourry.memverge@gmail.com> +> +> wrote: +> +> > +> +> > Early-boot e820 records will be inserted by the bios/efi/early boot +> +> > software and be reported to the kernel via insert_resource. Later, when +> +> > CXL drivers iterate through the regions again, they will insert another +> +> > resource and make the RESERVED memory area a child. +> +> +> +> I have already sent a patch +> +> +https://www.mail-archive.com/qemu-devel@nongnu.org/msg882012.html +. +> +> When the patch is applied, there would not be any reserved entries +> +> even with passing E820_RESERVED . +> +> So this patch needs to be evaluated in the light of the above patch I +> +> sent. Once you apply my patch, does the issue still exist? +> +> +> +> > +> +> > This RESERVED memory area causes the memory region to become unusable, +> +> > and as a result attempting to create memory regions with +> +> > +> +> > `cxl create-region ...` +> +> > +> +> > Will fail due to the RESERVED area intersecting with the CXL window. +> +> > +> +> > +> +> > During boot the following traceback is observed: +> +> > +> +> > 0xffffffff81101650 in insert_resource_expand_to_fit () +> +> > 0xffffffff83d964c5 in e820__reserve_resources_late () +> +> > 0xffffffff83e03210 in pcibios_resource_survey () +> +> > 0xffffffff83e04f4a in pcibios_init () +> +> > +> +> > Which produces a call to reserve the CFMWS area: +> +> > +> +> > (gdb) p *new +> +> > $54 = {start = 0x290000000, end = 0x2cfffffff, name = "Reserved", +> +> > flags = 0x200, desc = 0x7, parent = 0x0, sibling = 0x0, +> +> > child = 0x0} +> +> > +> +> > Later the Kernel parses ACPI tables and reserves the exact same area as +> +> > the CXL Fixed Memory Window. The use of `insert_resource_conflict` +> +> > retains the RESERVED region and makes it a child of the new region. +> +> > +> +> > 0xffffffff811016a4 in insert_resource_conflict () +> +> > insert_resource () +> +> > 0xffffffff81a81389 in cxl_parse_cfmws () +> +> > 0xffffffff818c4a81 in call_handler () +> +> > acpi_parse_entries_array () +> +> > +> +> > (gdb) p/x *new +> +> > $59 = {start = 0x290000000, end = 0x2cfffffff, name = "CXL Window 0", +> +> > flags = 0x200, desc = 0x0, parent = 0x0, sibling = 0x0, +> +> > child = 0x0} +> +> > +> +> > This produces the following output in /proc/iomem: +> +> > +> +> > 590000000-68fffffff : CXL Window 0 +> +> > 590000000-68fffffff : Reserved +> +> > +> +> > This reserved area causes `get_free_mem_region()` to fail due to a check +> +> > against `__region_intersects()`. Due to this reserved area, the +> +> > intersect check will only ever return REGION_INTERSECTS, which causes +> +> > `cxl create-region` to always fail. +> +> > +> +> > Signed-off-by: Gregory Price <gregory.price@memverge.com> +> +> > --- +> +> > hw/i386/pc.c | 2 -- +> +> > 1 file changed, 2 deletions(-) +> +> > +> +> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c +> +> > index 566accf7e6..5bf5465a21 100644 +> +> > --- a/hw/i386/pc.c +> +> > +++ b/hw/i386/pc.c +> +> > @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, +> +> > hwaddr cxl_size = MiB; +> +> > +> +> > cxl_base = pc_get_cxl_range_start(pcms); +> +> > - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +> +> > memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); +> +> > memory_region_add_subregion(system_memory, cxl_base, mr); +> +> > cxl_resv_end = cxl_base + cxl_size; +> +> > @@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, +> +> > memory_region_init_io(&fw->mr, OBJECT(machine), +> +> > &cfmws_ops, fw, +> +> > "cxl-fixed-memory-region", +> +> > fw->size); +> +> > memory_region_add_subregion(system_memory, fw->base, +> +> > &fw->mr); +> +> > - e820_add_entry(fw->base, fw->size, E820_RESERVED); +> +> > cxl_fmw_base += fw->size; +> +> > cxl_resv_end = cxl_fmw_base; +> +> > } +> +> > -- +> +> > 2.37.3 +> +> > + +> +>> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c +> +>> > index 566accf7e6..5bf5465a21 100644 +> +>> > --- a/hw/i386/pc.c +> +>> > +++ b/hw/i386/pc.c +> +>> > @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, +> +>> > hwaddr cxl_size = MiB; +> +>> > +> +>> > cxl_base = pc_get_cxl_range_start(pcms); +> +>> > - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +Just dropping it doesn't look like a good plan to me. + +You can try set etc/reserved-memory-end fw_cfg file instead. Firmware +(both seabios and ovmf) read it and will make sure the 64bit pci mmio +window is placed above that address, i.e. this effectively reserves +address space. Right now used by memory hotplug code, but should work +for cxl too I think (disclaimer: don't know much about cxl ...). + +take care & HTH, + Gerd + +On Tue, 8 Nov 2022 12:21:11 +0100 +Gerd Hoffmann <kraxel@redhat.com> wrote: + +> +> >> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c +> +> >> > index 566accf7e6..5bf5465a21 100644 +> +> >> > --- a/hw/i386/pc.c +> +> >> > +++ b/hw/i386/pc.c +> +> >> > @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, +> +> >> > hwaddr cxl_size = MiB; +> +> >> > +> +> >> > cxl_base = pc_get_cxl_range_start(pcms); +> +> >> > - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +> +> +Just dropping it doesn't look like a good plan to me. +> +> +You can try set etc/reserved-memory-end fw_cfg file instead. Firmware +> +(both seabios and ovmf) read it and will make sure the 64bit pci mmio +> +window is placed above that address, i.e. this effectively reserves +> +address space. Right now used by memory hotplug code, but should work +> +for cxl too I think (disclaimer: don't know much about cxl ...). +As far as I know CXL impl. in QEMU isn't using etc/reserved-memory-end +at all, it' has its own mapping. + +Regardless of that, reserved E820 entries look wrong, and looking at +commit message OS is right to bailout on them (expected according +to ACPI spec). +Also spec says + +" +E820 Assumptions and Limitations + [...] + The platform boot firmware does not return a range description for the memory +mapping of + PCI devices, ISA Option ROMs, and ISA Plug and Play cards because the OS has +mechanisms + available to detect them. +" + +so dropping reserved entries looks reasonable from ACPI spec point of view. +(disclaimer: don't know much about cxl ... either) +> +> +take care & HTH, +> +Gerd +> + +On Fri, Nov 11, 2022 at 11:51:23AM +0100, Igor Mammedov wrote: +> +On Tue, 8 Nov 2022 12:21:11 +0100 +> +Gerd Hoffmann <kraxel@redhat.com> wrote: +> +> +> > >> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c +> +> > >> > index 566accf7e6..5bf5465a21 100644 +> +> > >> > --- a/hw/i386/pc.c +> +> > >> > +++ b/hw/i386/pc.c +> +> > >> > @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, +> +> > >> > hwaddr cxl_size = MiB; +> +> > >> > +> +> > >> > cxl_base = pc_get_cxl_range_start(pcms); +> +> > >> > - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +> +> +> +> Just dropping it doesn't look like a good plan to me. +> +> +> +> You can try set etc/reserved-memory-end fw_cfg file instead. Firmware +> +> (both seabios and ovmf) read it and will make sure the 64bit pci mmio +> +> window is placed above that address, i.e. this effectively reserves +> +> address space. Right now used by memory hotplug code, but should work +> +> for cxl too I think (disclaimer: don't know much about cxl ...). +> +> +As far as I know CXL impl. in QEMU isn't using etc/reserved-memory-end +> +at all, it' has its own mapping. +This should be changed. cxl should make sure the highest address used +is stored in etc/reserved-memory-end to avoid the firmware mapping pci +resources there. + +> +so dropping reserved entries looks reasonable from ACPI spec point of view. +Yep, I don't want dispute that. + +I suspect the reason for these entries to exist in the first place is to +inform the firmware that it should not place stuff there, and if we +remove that to conform with the spec we need some alternative way for +that ... + +take care, + Gerd + +On Fri, 11 Nov 2022 12:40:59 +0100 +Gerd Hoffmann <kraxel@redhat.com> wrote: + +> +On Fri, Nov 11, 2022 at 11:51:23AM +0100, Igor Mammedov wrote: +> +> On Tue, 8 Nov 2022 12:21:11 +0100 +> +> Gerd Hoffmann <kraxel@redhat.com> wrote: +> +> +> +> > > >> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c +> +> > > >> > index 566accf7e6..5bf5465a21 100644 +> +> > > >> > --- a/hw/i386/pc.c +> +> > > >> > +++ b/hw/i386/pc.c +> +> > > >> > @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, +> +> > > >> > hwaddr cxl_size = MiB; +> +> > > >> > +> +> > > >> > cxl_base = pc_get_cxl_range_start(pcms); +> +> > > >> > - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +> +> > +> +> > Just dropping it doesn't look like a good plan to me. +> +> > +> +> > You can try set etc/reserved-memory-end fw_cfg file instead. Firmware +> +> > (both seabios and ovmf) read it and will make sure the 64bit pci mmio +> +> > window is placed above that address, i.e. this effectively reserves +> +> > address space. Right now used by memory hotplug code, but should work +> +> > for cxl too I think (disclaimer: don't know much about cxl ...). +> +> +> +> As far as I know CXL impl. in QEMU isn't using etc/reserved-memory-end +> +> at all, it' has its own mapping. +> +> +This should be changed. cxl should make sure the highest address used +> +is stored in etc/reserved-memory-end to avoid the firmware mapping pci +> +resources there. +if (pcmc->has_reserved_memory && machine->device_memory->base) { + +[...] + + if (pcms->cxl_devices_state.is_enabled) { + + res_mem_end = cxl_resv_end; + +that should be handled by this line + + } + + *val = cpu_to_le64(ROUND_UP(res_mem_end, 1 * GiB)); + + fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val)); + + } + +so SeaBIOS shouldn't intrude into CXL address space +(I assume EDK2 behave similarly here) + +> +> so dropping reserved entries looks reasonable from ACPI spec point of view. +> +> +> +> +Yep, I don't want dispute that. +> +> +I suspect the reason for these entries to exist in the first place is to +> +inform the firmware that it should not place stuff there, and if we +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +just to educate me, can you point out what SeaBIOS code does with reservations. + +> +remove that to conform with the spec we need some alternative way for +> +that ... +with etc/reserved-memory-end set as above, +is E820_RESERVED really needed here? + +(my understanding was that E820_RESERVED weren't accounted for when +initializing PCI devices) + +> +> +take care, +> +Gerd +> + +> +if (pcmc->has_reserved_memory && machine->device_memory->base) { +> +> +[...] +> +> +if (pcms->cxl_devices_state.is_enabled) { +> +> +res_mem_end = cxl_resv_end; +> +> +that should be handled by this line +> +> +} +> +> +*val = cpu_to_le64(ROUND_UP(res_mem_end, 1 * GiB)); +> +> +fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, +> +sizeof(*val)); +> +} +> +> +so SeaBIOS shouldn't intrude into CXL address space +Yes, looks good, so with this in place already everyting should be fine. + +> +(I assume EDK2 behave similarly here) +Correct, ovmf reads that fw_cfg file too. + +> +> I suspect the reason for these entries to exist in the first place is to +> +> inform the firmware that it should not place stuff there, and if we +> +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +> +just to educate me, can you point out what SeaBIOS code does with +> +reservations. +They are added to the e820 map which gets passed on to the OS. seabios +uses (and updateas) the e820 map too, when allocating memory for +example. While thinking about it I'm not fully sure it actually looks +at reservations, maybe it only uses (and updates) ram entries when +allocating memory. + +> +> remove that to conform with the spec we need some alternative way for +> +> that ... +> +> +with etc/reserved-memory-end set as above, +> +is E820_RESERVED really needed here? +No. Setting etc/reserved-memory-end is enough. + +So for the original patch: +Acked-by: Gerd Hoffmann <kraxel@redhat.com> + +take care, + Gerd + +On Fri, Nov 11, 2022 at 02:36:02PM +0100, Gerd Hoffmann wrote: +> +> if (pcmc->has_reserved_memory && machine->device_memory->base) { +> +> +> +> [...] +> +> +> +> if (pcms->cxl_devices_state.is_enabled) { +> +> +> +> res_mem_end = cxl_resv_end; +> +> +> +> that should be handled by this line +> +> +> +> } +> +> +> +> *val = cpu_to_le64(ROUND_UP(res_mem_end, 1 * GiB)); +> +> +> +> fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, +> +> sizeof(*val)); +> +> } +> +> +> +> so SeaBIOS shouldn't intrude into CXL address space +> +> +Yes, looks good, so with this in place already everyting should be fine. +> +> +> (I assume EDK2 behave similarly here) +> +> +Correct, ovmf reads that fw_cfg file too. +> +> +> > I suspect the reason for these entries to exist in the first place is to +> +> > inform the firmware that it should not place stuff there, and if we +> +> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +> +> just to educate me, can you point out what SeaBIOS code does with +> +> reservations. +> +> +They are added to the e820 map which gets passed on to the OS. seabios +> +uses (and updateas) the e820 map too, when allocating memory for +> +example. While thinking about it I'm not fully sure it actually looks +> +at reservations, maybe it only uses (and updates) ram entries when +> +allocating memory. +> +> +> > remove that to conform with the spec we need some alternative way for +> +> > that ... +> +> +> +> with etc/reserved-memory-end set as above, +> +> is E820_RESERVED really needed here? +> +> +No. Setting etc/reserved-memory-end is enough. +> +> +So for the original patch: +> +Acked-by: Gerd Hoffmann <kraxel@redhat.com> +> +> +take care, +> +Gerd +It's upstream already, sorry I can't add your tag. + +-- +MST + diff --git a/results/classifier/014/none/16228234 b/results/classifier/014/none/16228234 new file mode 100644 index 00000000..8a6f058a --- /dev/null +++ b/results/classifier/014/none/16228234 @@ -0,0 +1,1871 @@ +risc-v: 0.649 +user-level: 0.531 +mistranslation: 0.518 +ppc: 0.483 +operating system: 0.460 +KVM: 0.445 +VMM: 0.443 +network: 0.440 +permissions: 0.439 +device: 0.439 +register: 0.438 +TCG: 0.437 +hypervisor: 0.436 +arm: 0.435 +assembly: 0.435 +virtual: 0.421 +x86: 0.421 +vnc: 0.420 +peripherals: 0.411 +semantic: 0.411 +architecture: 0.410 +performance: 0.409 +graphic: 0.408 +i386: 0.404 +boot: 0.402 +socket: 0.401 +files: 0.394 +kernel: 0.393 +PID: 0.385 +debug: 0.384 +alpha: 0.379 + +[Qemu-devel] [Bug?] BQL about live migration + +Hello Juan & Dave, + +We hit a bug in our test: +Network error occurs when migrating a guest, libvirt then rollback the +migration, causes qemu coredump +qemu log: +2017-03-01T12:54:33.904949+08:00|info|qemu[17672]|[33614]|monitor_qapi_event_emit[479]|: + {"timestamp": {"seconds": 1488344073, "microseconds": 904914}, "event": "STOP"} +2017-03-01T12:54:37.522500+08:00|info|qemu[17672]|[17672]|handle_qmp_command[3930]|: + qmp_cmd_name: migrate_cancel +2017-03-01T12:54:37.522607+08:00|info|qemu[17672]|[17672]|monitor_qapi_event_emit[479]|: + {"timestamp": {"seconds": 1488344077, "microseconds": 522556}, "event": +"MIGRATION", "data": {"status": "cancelling"}} +2017-03-01T12:54:37.524671+08:00|info|qemu[17672]|[17672]|handle_qmp_command[3930]|: + qmp_cmd_name: cont +2017-03-01T12:54:37.524733+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: + virtio-balloon device status is 7 that means DRIVER OK +2017-03-01T12:54:37.525434+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: + virtio-net device status is 7 that means DRIVER OK +2017-03-01T12:54:37.525484+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: + virtio-blk device status is 7 that means DRIVER OK +2017-03-01T12:54:37.525562+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: + virtio-serial device status is 7 that means DRIVER OK +2017-03-01T12:54:37.527653+08:00|info|qemu[17672]|[17672]|vm_start[981]|: +vm_state-notify:3ms +2017-03-01T12:54:37.528523+08:00|info|qemu[17672]|[17672]|monitor_qapi_event_emit[479]|: + {"timestamp": {"seconds": 1488344077, "microseconds": 527699}, "event": +"RESUME"} +2017-03-01T12:54:37.530680+08:00|info|qemu[17672]|[33614]|migration_bitmap_sync[720]|: + this iteration cycle takes 3s, new dirtied data:0MB +2017-03-01T12:54:37.530909+08:00|info|qemu[17672]|[33614]|monitor_qapi_event_emit[479]|: + {"timestamp": {"seconds": 1488344077, "microseconds": 530733}, "event": +"MIGRATION_PASS", "data": {"pass": 3}} +2017-03-01T04:54:37.530997Z qemu-kvm: socket_writev_buffer: Got err=32 for +(131583/18446744073709551615) +qemu-kvm: /home/abuild/rpmbuild/BUILD/qemu-kvm-2.6.0/hw/net/virtio_net.c:1519: +virtio_net_save: Assertion `!n->vhost_started' failed. +2017-03-01 12:54:43.028: shutting down + +> +From qemu log, qemu received and processed migrate_cancel/cont qmp commands +after guest been stopped and entered the last round of migration. Then +migration thread try to save device state when guest is running(started by +cont command), causes assert and coredump. +This is because in last iter, we call cpu_synchronize_all_states() to +synchronize vcpu states, this call will release qemu_global_mutex and wait +for do_kvm_cpu_synchronize_state() to be executed on target vcpu: +(gdb) bt +#0 0x00007f763d1046d5 in pthread_cond_wait@@GLIBC_2.3.2 () from +/lib64/libpthread.so.0 +#1 0x00007f7643e51d7f in qemu_cond_wait (cond=0x7f764445eca0 <qemu_work_cond>, +mutex=0x7f764445eba0 <qemu_global_mutex>) at util/qemu-thread-posix.c:132 +#2 0x00007f7643a2e154 in run_on_cpu (cpu=0x7f7644e06d80, func=0x7f7643a46413 +<do_kvm_cpu_synchronize_state>, data=0x7f7644e06d80) at +/mnt/public/yanghy/qemu-kvm/cpus.c:995 +#3 0x00007f7643a46487 in kvm_cpu_synchronize_state (cpu=0x7f7644e06d80) at +/mnt/public/yanghy/qemu-kvm/kvm-all.c:1805 +#4 0x00007f7643a2c700 in cpu_synchronize_state (cpu=0x7f7644e06d80) at +/mnt/public/yanghy/qemu-kvm/include/sysemu/kvm.h:457 +#5 0x00007f7643a2db0c in cpu_synchronize_all_states () at +/mnt/public/yanghy/qemu-kvm/cpus.c:766 +#6 0x00007f7643a67b5b in qemu_savevm_state_complete_precopy (f=0x7f76462f2d30, +iterable_only=false) at /mnt/public/yanghy/qemu-kvm/migration/savevm.c:1051 +#7 0x00007f7643d121e9 in migration_completion (s=0x7f76443e78c0 +<current_migration.37571>, current_active_state=4, +old_vm_running=0x7f74343fda00, start_time=0x7f74343fda08) at +migration/migration.c:1753 +#8 0x00007f7643d126c5 in migration_thread (opaque=0x7f76443e78c0 +<current_migration.37571>) at migration/migration.c:1922 +#9 0x00007f763d100dc5 in start_thread () from /lib64/libpthread.so.0 +#10 0x00007f763ce2e71d in clone () from /lib64/libc.so.6 +(gdb) p iothread_locked +$1 = true + +and then, qemu main thread been executed, it won't block because migration +thread released the qemu_global_mutex: +(gdb) thr 1 +[Switching to thread 1 (Thread 0x7fe298e08bc0 (LWP 30767))] +#0 os_host_main_loop_wait (timeout=931565) at main-loop.c:270 +270 QEMU_LOG(LOG_INFO,"***** after qemu_pool_ns: timeout %d\n", +timeout); +(gdb) p iothread_locked +$2 = true +(gdb) l 268 +263 +264 ret = qemu_poll_ns((GPollFD *)gpollfds->data, gpollfds->len, +timeout); +265 +266 +267 if (timeout) { +268 qemu_mutex_lock_iothread(); +269 if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { +270 QEMU_LOG(LOG_INFO,"***** after qemu_pool_ns: timeout %d\n", +timeout); +271 } +272 } +(gdb) + +So, although we've hold iothread_lock in stop© phase of migration, we +can't guarantee the iothread been locked all through the stop & copy phase, +any thoughts on how to solve this problem? + + +Thanks, +-Gonglei + +On Fri, 03/03 09:29, Gonglei (Arei) wrote: +> +Hello Juan & Dave, +> +> +We hit a bug in our test: +> +Network error occurs when migrating a guest, libvirt then rollback the +> +migration, causes qemu coredump +> +qemu log: +> +2017-03-01T12:54:33.904949+08:00|info|qemu[17672]|[33614]|monitor_qapi_event_emit[479]|: +> +{"timestamp": {"seconds": 1488344073, "microseconds": 904914}, "event": +> +"STOP"} +> +2017-03-01T12:54:37.522500+08:00|info|qemu[17672]|[17672]|handle_qmp_command[3930]|: +> +qmp_cmd_name: migrate_cancel +> +2017-03-01T12:54:37.522607+08:00|info|qemu[17672]|[17672]|monitor_qapi_event_emit[479]|: +> +{"timestamp": {"seconds": 1488344077, "microseconds": 522556}, "event": +> +"MIGRATION", "data": {"status": "cancelling"}} +> +2017-03-01T12:54:37.524671+08:00|info|qemu[17672]|[17672]|handle_qmp_command[3930]|: +> +qmp_cmd_name: cont +> +2017-03-01T12:54:37.524733+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: +> +virtio-balloon device status is 7 that means DRIVER OK +> +2017-03-01T12:54:37.525434+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: +> +virtio-net device status is 7 that means DRIVER OK +> +2017-03-01T12:54:37.525484+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: +> +virtio-blk device status is 7 that means DRIVER OK +> +2017-03-01T12:54:37.525562+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: +> +virtio-serial device status is 7 that means DRIVER OK +> +2017-03-01T12:54:37.527653+08:00|info|qemu[17672]|[17672]|vm_start[981]|: +> +vm_state-notify:3ms +> +2017-03-01T12:54:37.528523+08:00|info|qemu[17672]|[17672]|monitor_qapi_event_emit[479]|: +> +{"timestamp": {"seconds": 1488344077, "microseconds": 527699}, "event": +> +"RESUME"} +> +2017-03-01T12:54:37.530680+08:00|info|qemu[17672]|[33614]|migration_bitmap_sync[720]|: +> +this iteration cycle takes 3s, new dirtied data:0MB +> +2017-03-01T12:54:37.530909+08:00|info|qemu[17672]|[33614]|monitor_qapi_event_emit[479]|: +> +{"timestamp": {"seconds": 1488344077, "microseconds": 530733}, "event": +> +"MIGRATION_PASS", "data": {"pass": 3}} +> +2017-03-01T04:54:37.530997Z qemu-kvm: socket_writev_buffer: Got err=32 for +> +(131583/18446744073709551615) +> +qemu-kvm: +> +/home/abuild/rpmbuild/BUILD/qemu-kvm-2.6.0/hw/net/virtio_net.c:1519: +> +virtio_net_save: Assertion `!n->vhost_started' failed. +> +2017-03-01 12:54:43.028: shutting down +> +> +From qemu log, qemu received and processed migrate_cancel/cont qmp commands +> +after guest been stopped and entered the last round of migration. Then +> +migration thread try to save device state when guest is running(started by +> +cont command), causes assert and coredump. +> +This is because in last iter, we call cpu_synchronize_all_states() to +> +synchronize vcpu states, this call will release qemu_global_mutex and wait +> +for do_kvm_cpu_synchronize_state() to be executed on target vcpu: +> +(gdb) bt +> +#0 0x00007f763d1046d5 in pthread_cond_wait@@GLIBC_2.3.2 () from +> +/lib64/libpthread.so.0 +> +#1 0x00007f7643e51d7f in qemu_cond_wait (cond=0x7f764445eca0 +> +<qemu_work_cond>, mutex=0x7f764445eba0 <qemu_global_mutex>) at +> +util/qemu-thread-posix.c:132 +> +#2 0x00007f7643a2e154 in run_on_cpu (cpu=0x7f7644e06d80, func=0x7f7643a46413 +> +<do_kvm_cpu_synchronize_state>, data=0x7f7644e06d80) at +> +/mnt/public/yanghy/qemu-kvm/cpus.c:995 +> +#3 0x00007f7643a46487 in kvm_cpu_synchronize_state (cpu=0x7f7644e06d80) at +> +/mnt/public/yanghy/qemu-kvm/kvm-all.c:1805 +> +#4 0x00007f7643a2c700 in cpu_synchronize_state (cpu=0x7f7644e06d80) at +> +/mnt/public/yanghy/qemu-kvm/include/sysemu/kvm.h:457 +> +#5 0x00007f7643a2db0c in cpu_synchronize_all_states () at +> +/mnt/public/yanghy/qemu-kvm/cpus.c:766 +> +#6 0x00007f7643a67b5b in qemu_savevm_state_complete_precopy +> +(f=0x7f76462f2d30, iterable_only=false) at +> +/mnt/public/yanghy/qemu-kvm/migration/savevm.c:1051 +> +#7 0x00007f7643d121e9 in migration_completion (s=0x7f76443e78c0 +> +<current_migration.37571>, current_active_state=4, +> +old_vm_running=0x7f74343fda00, start_time=0x7f74343fda08) at +> +migration/migration.c:1753 +> +#8 0x00007f7643d126c5 in migration_thread (opaque=0x7f76443e78c0 +> +<current_migration.37571>) at migration/migration.c:1922 +> +#9 0x00007f763d100dc5 in start_thread () from /lib64/libpthread.so.0 +> +#10 0x00007f763ce2e71d in clone () from /lib64/libc.so.6 +> +(gdb) p iothread_locked +> +$1 = true +> +> +and then, qemu main thread been executed, it won't block because migration +> +thread released the qemu_global_mutex: +> +(gdb) thr 1 +> +[Switching to thread 1 (Thread 0x7fe298e08bc0 (LWP 30767))] +> +#0 os_host_main_loop_wait (timeout=931565) at main-loop.c:270 +> +270 QEMU_LOG(LOG_INFO,"***** after qemu_pool_ns: timeout +> +%d\n", timeout); +> +(gdb) p iothread_locked +> +$2 = true +> +(gdb) l 268 +> +263 +> +264 ret = qemu_poll_ns((GPollFD *)gpollfds->data, gpollfds->len, +> +timeout); +> +265 +> +266 +> +267 if (timeout) { +> +268 qemu_mutex_lock_iothread(); +> +269 if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { +> +270 QEMU_LOG(LOG_INFO,"***** after qemu_pool_ns: timeout +> +%d\n", timeout); +> +271 } +> +272 } +> +(gdb) +> +> +So, although we've hold iothread_lock in stop© phase of migration, we +> +can't guarantee the iothread been locked all through the stop & copy phase, +> +any thoughts on how to solve this problem? +Could you post a backtrace of the assertion? + +Fam + +On 2017/3/3 18:42, Fam Zheng wrote: +> +On Fri, 03/03 09:29, Gonglei (Arei) wrote: +> +> Hello Juan & Dave, +> +> +> +> We hit a bug in our test: +> +> Network error occurs when migrating a guest, libvirt then rollback the +> +> migration, causes qemu coredump +> +> qemu log: +> +> 2017-03-01T12:54:33.904949+08:00|info|qemu[17672]|[33614]|monitor_qapi_event_emit[479]|: +> +> {"timestamp": {"seconds": 1488344073, "microseconds": 904914}, "event": +> +> "STOP"} +> +> 2017-03-01T12:54:37.522500+08:00|info|qemu[17672]|[17672]|handle_qmp_command[3930]|: +> +> qmp_cmd_name: migrate_cancel +> +> 2017-03-01T12:54:37.522607+08:00|info|qemu[17672]|[17672]|monitor_qapi_event_emit[479]|: +> +> {"timestamp": {"seconds": 1488344077, "microseconds": 522556}, "event": +> +> "MIGRATION", "data": {"status": "cancelling"}} +> +> 2017-03-01T12:54:37.524671+08:00|info|qemu[17672]|[17672]|handle_qmp_command[3930]|: +> +> qmp_cmd_name: cont +> +> 2017-03-01T12:54:37.524733+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: +> +> virtio-balloon device status is 7 that means DRIVER OK +> +> 2017-03-01T12:54:37.525434+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: +> +> virtio-net device status is 7 that means DRIVER OK +> +> 2017-03-01T12:54:37.525484+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: +> +> virtio-blk device status is 7 that means DRIVER OK +> +> 2017-03-01T12:54:37.525562+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: +> +> virtio-serial device status is 7 that means DRIVER OK +> +> 2017-03-01T12:54:37.527653+08:00|info|qemu[17672]|[17672]|vm_start[981]|: +> +> vm_state-notify:3ms +> +> 2017-03-01T12:54:37.528523+08:00|info|qemu[17672]|[17672]|monitor_qapi_event_emit[479]|: +> +> {"timestamp": {"seconds": 1488344077, "microseconds": 527699}, "event": +> +> "RESUME"} +> +> 2017-03-01T12:54:37.530680+08:00|info|qemu[17672]|[33614]|migration_bitmap_sync[720]|: +> +> this iteration cycle takes 3s, new dirtied data:0MB +> +> 2017-03-01T12:54:37.530909+08:00|info|qemu[17672]|[33614]|monitor_qapi_event_emit[479]|: +> +> {"timestamp": {"seconds": 1488344077, "microseconds": 530733}, "event": +> +> "MIGRATION_PASS", "data": {"pass": 3}} +> +> 2017-03-01T04:54:37.530997Z qemu-kvm: socket_writev_buffer: Got err=32 for +> +> (131583/18446744073709551615) +> +> qemu-kvm: +> +> /home/abuild/rpmbuild/BUILD/qemu-kvm-2.6.0/hw/net/virtio_net.c:1519: +> +> virtio_net_save: Assertion `!n->vhost_started' failed. +> +> 2017-03-01 12:54:43.028: shutting down +> +> +> +> From qemu log, qemu received and processed migrate_cancel/cont qmp commands +> +> after guest been stopped and entered the last round of migration. Then +> +> migration thread try to save device state when guest is running(started by +> +> cont command), causes assert and coredump. +> +> This is because in last iter, we call cpu_synchronize_all_states() to +> +> synchronize vcpu states, this call will release qemu_global_mutex and wait +> +> for do_kvm_cpu_synchronize_state() to be executed on target vcpu: +> +> (gdb) bt +> +> #0 0x00007f763d1046d5 in pthread_cond_wait@@GLIBC_2.3.2 () from +> +> /lib64/libpthread.so.0 +> +> #1 0x00007f7643e51d7f in qemu_cond_wait (cond=0x7f764445eca0 +> +> <qemu_work_cond>, mutex=0x7f764445eba0 <qemu_global_mutex>) at +> +> util/qemu-thread-posix.c:132 +> +> #2 0x00007f7643a2e154 in run_on_cpu (cpu=0x7f7644e06d80, +> +> func=0x7f7643a46413 <do_kvm_cpu_synchronize_state>, data=0x7f7644e06d80) at +> +> /mnt/public/yanghy/qemu-kvm/cpus.c:995 +> +> #3 0x00007f7643a46487 in kvm_cpu_synchronize_state (cpu=0x7f7644e06d80) at +> +> /mnt/public/yanghy/qemu-kvm/kvm-all.c:1805 +> +> #4 0x00007f7643a2c700 in cpu_synchronize_state (cpu=0x7f7644e06d80) at +> +> /mnt/public/yanghy/qemu-kvm/include/sysemu/kvm.h:457 +> +> #5 0x00007f7643a2db0c in cpu_synchronize_all_states () at +> +> /mnt/public/yanghy/qemu-kvm/cpus.c:766 +> +> #6 0x00007f7643a67b5b in qemu_savevm_state_complete_precopy +> +> (f=0x7f76462f2d30, iterable_only=false) at +> +> /mnt/public/yanghy/qemu-kvm/migration/savevm.c:1051 +> +> #7 0x00007f7643d121e9 in migration_completion (s=0x7f76443e78c0 +> +> <current_migration.37571>, current_active_state=4, +> +> old_vm_running=0x7f74343fda00, start_time=0x7f74343fda08) at +> +> migration/migration.c:1753 +> +> #8 0x00007f7643d126c5 in migration_thread (opaque=0x7f76443e78c0 +> +> <current_migration.37571>) at migration/migration.c:1922 +> +> #9 0x00007f763d100dc5 in start_thread () from /lib64/libpthread.so.0 +> +> #10 0x00007f763ce2e71d in clone () from /lib64/libc.so.6 +> +> (gdb) p iothread_locked +> +> $1 = true +> +> +> +> and then, qemu main thread been executed, it won't block because migration +> +> thread released the qemu_global_mutex: +> +> (gdb) thr 1 +> +> [Switching to thread 1 (Thread 0x7fe298e08bc0 (LWP 30767))] +> +> #0 os_host_main_loop_wait (timeout=931565) at main-loop.c:270 +> +> 270 QEMU_LOG(LOG_INFO,"***** after qemu_pool_ns: timeout +> +> %d\n", timeout); +> +> (gdb) p iothread_locked +> +> $2 = true +> +> (gdb) l 268 +> +> 263 +> +> 264 ret = qemu_poll_ns((GPollFD *)gpollfds->data, gpollfds->len, +> +> timeout); +> +> 265 +> +> 266 +> +> 267 if (timeout) { +> +> 268 qemu_mutex_lock_iothread(); +> +> 269 if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { +> +> 270 QEMU_LOG(LOG_INFO,"***** after qemu_pool_ns: timeout +> +> %d\n", timeout); +> +> 271 } +> +> 272 } +> +> (gdb) +> +> +> +> So, although we've hold iothread_lock in stop© phase of migration, we +> +> can't guarantee the iothread been locked all through the stop & copy phase, +> +> any thoughts on how to solve this problem? +> +> +Could you post a backtrace of the assertion? +#0 0x00007f97b1fbe5d7 in raise () from /usr/lib64/libc.so.6 +#1 0x00007f97b1fbfcc8 in abort () from /usr/lib64/libc.so.6 +#2 0x00007f97b1fb7546 in __assert_fail_base () from /usr/lib64/libc.so.6 +#3 0x00007f97b1fb75f2 in __assert_fail () from /usr/lib64/libc.so.6 +#4 0x000000000049fd19 in virtio_net_save (f=0x7f97a8ca44d0, +opaque=0x7f97a86e9018) at /usr/src/debug/qemu-kvm-2.6.0/hw/ +#5 0x000000000047e380 in vmstate_save_old_style (address@hidden, +address@hidden, se=0x7f9 +#6 0x000000000047fb93 in vmstate_save (address@hidden, address@hidden, +address@hidden +#7 0x0000000000481ad2 in qemu_savevm_state_complete_precopy (f=0x7f97a8ca44d0, +address@hidden) +#8 0x00000000006c6b60 in migration_completion (address@hidden +<current_migration.38312>, current_active_state=curre + address@hidden) at migration/migration.c:1761 +#9 0x00000000006c71db in migration_thread (address@hidden +<current_migration.38312>) at migration/migrati + +> +> +Fam +> +-- +Thanks, +Yang + +* Gonglei (Arei) (address@hidden) wrote: +> +Hello Juan & Dave, +cc'ing in pbonzini since it's magic involving cpu_synrhonize_all_states() + +> +We hit a bug in our test: +> +Network error occurs when migrating a guest, libvirt then rollback the +> +migration, causes qemu coredump +> +qemu log: +> +2017-03-01T12:54:33.904949+08:00|info|qemu[17672]|[33614]|monitor_qapi_event_emit[479]|: +> +{"timestamp": {"seconds": 1488344073, "microseconds": 904914}, "event": +> +"STOP"} +> +2017-03-01T12:54:37.522500+08:00|info|qemu[17672]|[17672]|handle_qmp_command[3930]|: +> +qmp_cmd_name: migrate_cancel +> +2017-03-01T12:54:37.522607+08:00|info|qemu[17672]|[17672]|monitor_qapi_event_emit[479]|: +> +{"timestamp": {"seconds": 1488344077, "microseconds": 522556}, "event": +> +"MIGRATION", "data": {"status": "cancelling"}} +> +2017-03-01T12:54:37.524671+08:00|info|qemu[17672]|[17672]|handle_qmp_command[3930]|: +> +qmp_cmd_name: cont +> +2017-03-01T12:54:37.524733+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: +> +virtio-balloon device status is 7 that means DRIVER OK +> +2017-03-01T12:54:37.525434+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: +> +virtio-net device status is 7 that means DRIVER OK +> +2017-03-01T12:54:37.525484+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: +> +virtio-blk device status is 7 that means DRIVER OK +> +2017-03-01T12:54:37.525562+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: +> +virtio-serial device status is 7 that means DRIVER OK +> +2017-03-01T12:54:37.527653+08:00|info|qemu[17672]|[17672]|vm_start[981]|: +> +vm_state-notify:3ms +> +2017-03-01T12:54:37.528523+08:00|info|qemu[17672]|[17672]|monitor_qapi_event_emit[479]|: +> +{"timestamp": {"seconds": 1488344077, "microseconds": 527699}, "event": +> +"RESUME"} +> +2017-03-01T12:54:37.530680+08:00|info|qemu[17672]|[33614]|migration_bitmap_sync[720]|: +> +this iteration cycle takes 3s, new dirtied data:0MB +> +2017-03-01T12:54:37.530909+08:00|info|qemu[17672]|[33614]|monitor_qapi_event_emit[479]|: +> +{"timestamp": {"seconds": 1488344077, "microseconds": 530733}, "event": +> +"MIGRATION_PASS", "data": {"pass": 3}} +> +2017-03-01T04:54:37.530997Z qemu-kvm: socket_writev_buffer: Got err=32 for +> +(131583/18446744073709551615) +> +qemu-kvm: +> +/home/abuild/rpmbuild/BUILD/qemu-kvm-2.6.0/hw/net/virtio_net.c:1519: +> +virtio_net_save: Assertion `!n->vhost_started' failed. +> +2017-03-01 12:54:43.028: shutting down +> +> +From qemu log, qemu received and processed migrate_cancel/cont qmp commands +> +after guest been stopped and entered the last round of migration. Then +> +migration thread try to save device state when guest is running(started by +> +cont command), causes assert and coredump. +> +This is because in last iter, we call cpu_synchronize_all_states() to +> +synchronize vcpu states, this call will release qemu_global_mutex and wait +> +for do_kvm_cpu_synchronize_state() to be executed on target vcpu: +> +(gdb) bt +> +#0 0x00007f763d1046d5 in pthread_cond_wait@@GLIBC_2.3.2 () from +> +/lib64/libpthread.so.0 +> +#1 0x00007f7643e51d7f in qemu_cond_wait (cond=0x7f764445eca0 +> +<qemu_work_cond>, mutex=0x7f764445eba0 <qemu_global_mutex>) at +> +util/qemu-thread-posix.c:132 +> +#2 0x00007f7643a2e154 in run_on_cpu (cpu=0x7f7644e06d80, func=0x7f7643a46413 +> +<do_kvm_cpu_synchronize_state>, data=0x7f7644e06d80) at +> +/mnt/public/yanghy/qemu-kvm/cpus.c:995 +> +#3 0x00007f7643a46487 in kvm_cpu_synchronize_state (cpu=0x7f7644e06d80) at +> +/mnt/public/yanghy/qemu-kvm/kvm-all.c:1805 +> +#4 0x00007f7643a2c700 in cpu_synchronize_state (cpu=0x7f7644e06d80) at +> +/mnt/public/yanghy/qemu-kvm/include/sysemu/kvm.h:457 +> +#5 0x00007f7643a2db0c in cpu_synchronize_all_states () at +> +/mnt/public/yanghy/qemu-kvm/cpus.c:766 +> +#6 0x00007f7643a67b5b in qemu_savevm_state_complete_precopy +> +(f=0x7f76462f2d30, iterable_only=false) at +> +/mnt/public/yanghy/qemu-kvm/migration/savevm.c:1051 +> +#7 0x00007f7643d121e9 in migration_completion (s=0x7f76443e78c0 +> +<current_migration.37571>, current_active_state=4, +> +old_vm_running=0x7f74343fda00, start_time=0x7f74343fda08) at +> +migration/migration.c:1753 +> +#8 0x00007f7643d126c5 in migration_thread (opaque=0x7f76443e78c0 +> +<current_migration.37571>) at migration/migration.c:1922 +> +#9 0x00007f763d100dc5 in start_thread () from /lib64/libpthread.so.0 +> +#10 0x00007f763ce2e71d in clone () from /lib64/libc.so.6 +> +(gdb) p iothread_locked +> +$1 = true +> +> +and then, qemu main thread been executed, it won't block because migration +> +thread released the qemu_global_mutex: +> +(gdb) thr 1 +> +[Switching to thread 1 (Thread 0x7fe298e08bc0 (LWP 30767))] +> +#0 os_host_main_loop_wait (timeout=931565) at main-loop.c:270 +> +270 QEMU_LOG(LOG_INFO,"***** after qemu_pool_ns: timeout +> +%d\n", timeout); +> +(gdb) p iothread_locked +> +$2 = true +> +(gdb) l 268 +> +263 +> +264 ret = qemu_poll_ns((GPollFD *)gpollfds->data, gpollfds->len, +> +timeout); +> +265 +> +266 +> +267 if (timeout) { +> +268 qemu_mutex_lock_iothread(); +> +269 if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { +> +270 QEMU_LOG(LOG_INFO,"***** after qemu_pool_ns: timeout +> +%d\n", timeout); +> +271 } +> +272 } +> +(gdb) +> +> +So, although we've hold iothread_lock in stop© phase of migration, we +> +can't guarantee the iothread been locked all through the stop & copy phase, +> +any thoughts on how to solve this problem? +Ouch that's pretty nasty; I remember Paolo explaining to me a while ago that +their were times when run_on_cpu would have to drop the BQL and I worried about +it, +but this is the 1st time I've seen an error due to it. + +Do you know what the migration state was at that point? Was it +MIGRATION_STATUS_CANCELLING? +I'm thinking perhaps we should stop 'cont' from continuing while migration is in +MIGRATION_STATUS_CANCELLING. Do we send an event when we hit CANCELLED - so +that +perhaps libvirt could avoid sending the 'cont' until then? + +Dave + + +> +> +Thanks, +> +-Gonglei +> +-- +Dr. David Alan Gilbert / address@hidden / Manchester, UK + +On 03/03/2017 13:00, Dr. David Alan Gilbert wrote: +> +Ouch that's pretty nasty; I remember Paolo explaining to me a while ago that +> +their were times when run_on_cpu would have to drop the BQL and I worried +> +about it, +> +but this is the 1st time I've seen an error due to it. +> +> +Do you know what the migration state was at that point? Was it +> +MIGRATION_STATUS_CANCELLING? +> +I'm thinking perhaps we should stop 'cont' from continuing while migration is +> +in +> +MIGRATION_STATUS_CANCELLING. Do we send an event when we hit CANCELLED - so +> +that +> +perhaps libvirt could avoid sending the 'cont' until then? +No, there's no event, though I thought libvirt would poll until +"query-migrate" returns the cancelled state. Of course that is a small +consolation, because a segfault is unacceptable. + +One possibility is to suspend the monitor in qmp_migrate_cancel and +resume it (with add_migration_state_change_notifier) when we hit the +CANCELLED state. I'm not sure what the latency would be between the end +of migrate_fd_cancel and finally reaching CANCELLED. + +Paolo + +* Paolo Bonzini (address@hidden) wrote: +> +> +> +On 03/03/2017 13:00, Dr. David Alan Gilbert wrote: +> +> Ouch that's pretty nasty; I remember Paolo explaining to me a while ago that +> +> their were times when run_on_cpu would have to drop the BQL and I worried +> +> about it, +> +> but this is the 1st time I've seen an error due to it. +> +> +> +> Do you know what the migration state was at that point? Was it +> +> MIGRATION_STATUS_CANCELLING? +> +> I'm thinking perhaps we should stop 'cont' from continuing while migration +> +> is in +> +> MIGRATION_STATUS_CANCELLING. Do we send an event when we hit CANCELLED - +> +> so that +> +> perhaps libvirt could avoid sending the 'cont' until then? +> +> +No, there's no event, though I thought libvirt would poll until +> +"query-migrate" returns the cancelled state. Of course that is a small +> +consolation, because a segfault is unacceptable. +I think you might get an event if you set the new migrate capability called +'events' on! + +void migrate_set_state(int *state, int old_state, int new_state) +{ + if (atomic_cmpxchg(state, old_state, new_state) == old_state) { + trace_migrate_set_state(new_state); + migrate_generate_event(new_state); + } +} + +static void migrate_generate_event(int new_state) +{ + if (migrate_use_events()) { + qapi_event_send_migration(new_state, &error_abort); + } +} + +That event feature went in sometime after 2.3.0. + +> +One possibility is to suspend the monitor in qmp_migrate_cancel and +> +resume it (with add_migration_state_change_notifier) when we hit the +> +CANCELLED state. I'm not sure what the latency would be between the end +> +of migrate_fd_cancel and finally reaching CANCELLED. +I don't like suspending monitors; it can potentially take quite a significant +time to do a cancel. +How about making 'cont' fail if we're in CANCELLING? + +I'd really love to see the 'run_on_cpu' being more careful about the BQL; +we really need all of the rest of the devices to stay quiesced at times. + +Dave + +> +Paolo +-- +Dr. David Alan Gilbert / address@hidden / Manchester, UK + +On 03/03/2017 14:11, Dr. David Alan Gilbert wrote: +> +* Paolo Bonzini (address@hidden) wrote: +> +> +> +> +> +> On 03/03/2017 13:00, Dr. David Alan Gilbert wrote: +> +>> Ouch that's pretty nasty; I remember Paolo explaining to me a while ago that +> +>> their were times when run_on_cpu would have to drop the BQL and I worried +> +>> about it, +> +>> but this is the 1st time I've seen an error due to it. +> +>> +> +>> Do you know what the migration state was at that point? Was it +> +>> MIGRATION_STATUS_CANCELLING? +> +>> I'm thinking perhaps we should stop 'cont' from continuing while migration +> +>> is in +> +>> MIGRATION_STATUS_CANCELLING. Do we send an event when we hit CANCELLED - +> +>> so that +> +>> perhaps libvirt could avoid sending the 'cont' until then? +> +> +> +> No, there's no event, though I thought libvirt would poll until +> +> "query-migrate" returns the cancelled state. Of course that is a small +> +> consolation, because a segfault is unacceptable. +> +> +I think you might get an event if you set the new migrate capability called +> +'events' on! +> +> +void migrate_set_state(int *state, int old_state, int new_state) +> +{ +> +if (atomic_cmpxchg(state, old_state, new_state) == old_state) { +> +trace_migrate_set_state(new_state); +> +migrate_generate_event(new_state); +> +} +> +} +> +> +static void migrate_generate_event(int new_state) +> +{ +> +if (migrate_use_events()) { +> +qapi_event_send_migration(new_state, &error_abort); +> +} +> +} +> +> +That event feature went in sometime after 2.3.0. +> +> +> One possibility is to suspend the monitor in qmp_migrate_cancel and +> +> resume it (with add_migration_state_change_notifier) when we hit the +> +> CANCELLED state. I'm not sure what the latency would be between the end +> +> of migrate_fd_cancel and finally reaching CANCELLED. +> +> +I don't like suspending monitors; it can potentially take quite a significant +> +time to do a cancel. +> +How about making 'cont' fail if we're in CANCELLING? +Actually I thought that would be the case already (in fact CANCELLING is +internal only; the outside world sees it as "active" in query-migrate). + +Lei, what is the runstate? (That is, why did cont succeed at all)? + +Paolo + +> +I'd really love to see the 'run_on_cpu' being more careful about the BQL; +> +we really need all of the rest of the devices to stay quiesced at times. +That's not really possible, because of how condition variables work. :( + +* Paolo Bonzini (address@hidden) wrote: +> +> +> +On 03/03/2017 14:11, Dr. David Alan Gilbert wrote: +> +> * Paolo Bonzini (address@hidden) wrote: +> +>> +> +>> +> +>> On 03/03/2017 13:00, Dr. David Alan Gilbert wrote: +> +>>> Ouch that's pretty nasty; I remember Paolo explaining to me a while ago +> +>>> that +> +>>> their were times when run_on_cpu would have to drop the BQL and I worried +> +>>> about it, +> +>>> but this is the 1st time I've seen an error due to it. +> +>>> +> +>>> Do you know what the migration state was at that point? Was it +> +>>> MIGRATION_STATUS_CANCELLING? +> +>>> I'm thinking perhaps we should stop 'cont' from continuing while +> +>>> migration is in +> +>>> MIGRATION_STATUS_CANCELLING. Do we send an event when we hit CANCELLED - +> +>>> so that +> +>>> perhaps libvirt could avoid sending the 'cont' until then? +> +>> +> +>> No, there's no event, though I thought libvirt would poll until +> +>> "query-migrate" returns the cancelled state. Of course that is a small +> +>> consolation, because a segfault is unacceptable. +> +> +> +> I think you might get an event if you set the new migrate capability called +> +> 'events' on! +> +> +> +> void migrate_set_state(int *state, int old_state, int new_state) +> +> { +> +> if (atomic_cmpxchg(state, old_state, new_state) == old_state) { +> +> trace_migrate_set_state(new_state); +> +> migrate_generate_event(new_state); +> +> } +> +> } +> +> +> +> static void migrate_generate_event(int new_state) +> +> { +> +> if (migrate_use_events()) { +> +> qapi_event_send_migration(new_state, &error_abort); +> +> } +> +> } +> +> +> +> That event feature went in sometime after 2.3.0. +> +> +> +>> One possibility is to suspend the monitor in qmp_migrate_cancel and +> +>> resume it (with add_migration_state_change_notifier) when we hit the +> +>> CANCELLED state. I'm not sure what the latency would be between the end +> +>> of migrate_fd_cancel and finally reaching CANCELLED. +> +> +> +> I don't like suspending monitors; it can potentially take quite a +> +> significant +> +> time to do a cancel. +> +> How about making 'cont' fail if we're in CANCELLING? +> +> +Actually I thought that would be the case already (in fact CANCELLING is +> +internal only; the outside world sees it as "active" in query-migrate). +> +> +Lei, what is the runstate? (That is, why did cont succeed at all)? +I suspect it's RUN_STATE_FINISH_MIGRATE - we set that before we do the device +save, and that's what we get at the end of a migrate and it's legal to restart +from there. + +> +Paolo +> +> +> I'd really love to see the 'run_on_cpu' being more careful about the BQL; +> +> we really need all of the rest of the devices to stay quiesced at times. +> +> +That's not really possible, because of how condition variables work. :( +*Really* we need to find a solution to that - there's probably lots of +other things that can spring up in that small window other than the +'cont'. + +Dave + +-- +Dr. David Alan Gilbert / address@hidden / Manchester, UK + +On 03/03/2017 14:26, Dr. David Alan Gilbert wrote: +> +* Paolo Bonzini (address@hidden) wrote: +> +> +> +> +> +> On 03/03/2017 14:11, Dr. David Alan Gilbert wrote: +> +>> * Paolo Bonzini (address@hidden) wrote: +> +>>> +> +>>> +> +>>> On 03/03/2017 13:00, Dr. David Alan Gilbert wrote: +> +>>>> Ouch that's pretty nasty; I remember Paolo explaining to me a while ago +> +>>>> that +> +>>>> their were times when run_on_cpu would have to drop the BQL and I worried +> +>>>> about it, +> +>>>> but this is the 1st time I've seen an error due to it. +> +>>>> +> +>>>> Do you know what the migration state was at that point? Was it +> +>>>> MIGRATION_STATUS_CANCELLING? +> +>>>> I'm thinking perhaps we should stop 'cont' from continuing while +> +>>>> migration is in +> +>>>> MIGRATION_STATUS_CANCELLING. Do we send an event when we hit CANCELLED - +> +>>>> so that +> +>>>> perhaps libvirt could avoid sending the 'cont' until then? +> +>>> +> +>>> No, there's no event, though I thought libvirt would poll until +> +>>> "query-migrate" returns the cancelled state. Of course that is a small +> +>>> consolation, because a segfault is unacceptable. +> +>> +> +>> I think you might get an event if you set the new migrate capability called +> +>> 'events' on! +> +>> +> +>> void migrate_set_state(int *state, int old_state, int new_state) +> +>> { +> +>> if (atomic_cmpxchg(state, old_state, new_state) == old_state) { +> +>> trace_migrate_set_state(new_state); +> +>> migrate_generate_event(new_state); +> +>> } +> +>> } +> +>> +> +>> static void migrate_generate_event(int new_state) +> +>> { +> +>> if (migrate_use_events()) { +> +>> qapi_event_send_migration(new_state, &error_abort); +> +>> } +> +>> } +> +>> +> +>> That event feature went in sometime after 2.3.0. +> +>> +> +>>> One possibility is to suspend the monitor in qmp_migrate_cancel and +> +>>> resume it (with add_migration_state_change_notifier) when we hit the +> +>>> CANCELLED state. I'm not sure what the latency would be between the end +> +>>> of migrate_fd_cancel and finally reaching CANCELLED. +> +>> +> +>> I don't like suspending monitors; it can potentially take quite a +> +>> significant +> +>> time to do a cancel. +> +>> How about making 'cont' fail if we're in CANCELLING? +> +> +> +> Actually I thought that would be the case already (in fact CANCELLING is +> +> internal only; the outside world sees it as "active" in query-migrate). +> +> +> +> Lei, what is the runstate? (That is, why did cont succeed at all)? +> +> +I suspect it's RUN_STATE_FINISH_MIGRATE - we set that before we do the device +> +save, and that's what we get at the end of a migrate and it's legal to restart +> +from there. +Yeah, but I think we get there at the end of a failed migrate only. So +perhaps we can introduce a new state RUN_STATE_FAILED_MIGRATE and forbid +"cont" from finish-migrate (only allow it from failed-migrate)? + +Paolo + +> +> Paolo +> +> +> +>> I'd really love to see the 'run_on_cpu' being more careful about the BQL; +> +>> we really need all of the rest of the devices to stay quiesced at times. +> +> +> +> That's not really possible, because of how condition variables work. :( +> +> +*Really* we need to find a solution to that - there's probably lots of +> +other things that can spring up in that small window other than the +> +'cont'. +> +> +Dave +> +> +-- +> +Dr. David Alan Gilbert / address@hidden / Manchester, UK +> + +Hi Paolo, + +On Fri, Mar 3, 2017 at 9:33 PM, Paolo Bonzini <address@hidden> wrote: + +> +> +> +On 03/03/2017 14:26, Dr. David Alan Gilbert wrote: +> +> * Paolo Bonzini (address@hidden) wrote: +> +>> +> +>> +> +>> On 03/03/2017 14:11, Dr. David Alan Gilbert wrote: +> +>>> * Paolo Bonzini (address@hidden) wrote: +> +>>>> +> +>>>> +> +>>>> On 03/03/2017 13:00, Dr. David Alan Gilbert wrote: +> +>>>>> Ouch that's pretty nasty; I remember Paolo explaining to me a while +> +ago that +> +>>>>> their were times when run_on_cpu would have to drop the BQL and I +> +worried about it, +> +>>>>> but this is the 1st time I've seen an error due to it. +> +>>>>> +> +>>>>> Do you know what the migration state was at that point? Was it +> +MIGRATION_STATUS_CANCELLING? +> +>>>>> I'm thinking perhaps we should stop 'cont' from continuing while +> +migration is in +> +>>>>> MIGRATION_STATUS_CANCELLING. Do we send an event when we hit +> +CANCELLED - so that +> +>>>>> perhaps libvirt could avoid sending the 'cont' until then? +> +>>>> +> +>>>> No, there's no event, though I thought libvirt would poll until +> +>>>> "query-migrate" returns the cancelled state. Of course that is a +> +small +> +>>>> consolation, because a segfault is unacceptable. +> +>>> +> +>>> I think you might get an event if you set the new migrate capability +> +called +> +>>> 'events' on! +> +>>> +> +>>> void migrate_set_state(int *state, int old_state, int new_state) +> +>>> { +> +>>> if (atomic_cmpxchg(state, old_state, new_state) == old_state) { +> +>>> trace_migrate_set_state(new_state); +> +>>> migrate_generate_event(new_state); +> +>>> } +> +>>> } +> +>>> +> +>>> static void migrate_generate_event(int new_state) +> +>>> { +> +>>> if (migrate_use_events()) { +> +>>> qapi_event_send_migration(new_state, &error_abort); +> +>>> } +> +>>> } +> +>>> +> +>>> That event feature went in sometime after 2.3.0. +> +>>> +> +>>>> One possibility is to suspend the monitor in qmp_migrate_cancel and +> +>>>> resume it (with add_migration_state_change_notifier) when we hit the +> +>>>> CANCELLED state. I'm not sure what the latency would be between the +> +end +> +>>>> of migrate_fd_cancel and finally reaching CANCELLED. +> +>>> +> +>>> I don't like suspending monitors; it can potentially take quite a +> +significant +> +>>> time to do a cancel. +> +>>> How about making 'cont' fail if we're in CANCELLING? +> +>> +> +>> Actually I thought that would be the case already (in fact CANCELLING is +> +>> internal only; the outside world sees it as "active" in query-migrate). +> +>> +> +>> Lei, what is the runstate? (That is, why did cont succeed at all)? +> +> +> +> I suspect it's RUN_STATE_FINISH_MIGRATE - we set that before we do the +> +device +> +> save, and that's what we get at the end of a migrate and it's legal to +> +restart +> +> from there. +> +> +Yeah, but I think we get there at the end of a failed migrate only. So +> +perhaps we can introduce a new state RUN_STATE_FAILED_MIGRATE +I think we do not need to introduce a new state here. If we hit 'cont' and +the run state is RUN_STATE_FINISH_MIGRATE, we could assume that +migration failed because 'RUN_STATE_FINISH_MIGRATE' only exists on +source side, means we are finishing migration, a 'cont' at the meantime +indicates that we are rolling back, otherwise source side should be +destroyed. + + +> +and forbid +> +"cont" from finish-migrate (only allow it from failed-migrate)? +> +The problem of forbid 'cont' here is that it will result in a failed +migration and the source +side will remain paused. We actually expect a usable guest when rollback. +Is there a way to kill migration thread when we're under main thread, if +there is, we +could do the following to solve this problem: +1. 'cont' received during runstate RUN_STATE_FINISH_MIGRATE +2. kill migration thread +3. vm_start() + +But this only solves 'cont' problem. As Dave said before, other things could +happen during the small windows while we are finishing migration, that's +what I was worried about... + + +> +Paolo +> +> +>> Paolo +> +>> +> +>>> I'd really love to see the 'run_on_cpu' being more careful about the +> +BQL; +> +>>> we really need all of the rest of the devices to stay quiesced at +> +times. +> +>> +> +>> That's not really possible, because of how condition variables work. :( +> +> +> +> *Really* we need to find a solution to that - there's probably lots of +> +> other things that can spring up in that small window other than the +> +> 'cont'. +> +> +> +> Dave +> +> +> +> -- +> +> Dr. David Alan Gilbert / address@hidden / Manchester, UK +> +> +> +> + +* Paolo Bonzini (address@hidden) wrote: +> +> +> +On 03/03/2017 14:26, Dr. David Alan Gilbert wrote: +> +> * Paolo Bonzini (address@hidden) wrote: +> +>> +> +>> +> +>> On 03/03/2017 14:11, Dr. David Alan Gilbert wrote: +> +>>> * Paolo Bonzini (address@hidden) wrote: +> +>>>> +> +>>>> +> +>>>> On 03/03/2017 13:00, Dr. David Alan Gilbert wrote: +> +>>>>> Ouch that's pretty nasty; I remember Paolo explaining to me a while ago +> +>>>>> that +> +>>>>> their were times when run_on_cpu would have to drop the BQL and I +> +>>>>> worried about it, +> +>>>>> but this is the 1st time I've seen an error due to it. +> +>>>>> +> +>>>>> Do you know what the migration state was at that point? Was it +> +>>>>> MIGRATION_STATUS_CANCELLING? +> +>>>>> I'm thinking perhaps we should stop 'cont' from continuing while +> +>>>>> migration is in +> +>>>>> MIGRATION_STATUS_CANCELLING. Do we send an event when we hit CANCELLED +> +>>>>> - so that +> +>>>>> perhaps libvirt could avoid sending the 'cont' until then? +> +>>>> +> +>>>> No, there's no event, though I thought libvirt would poll until +> +>>>> "query-migrate" returns the cancelled state. Of course that is a small +> +>>>> consolation, because a segfault is unacceptable. +> +>>> +> +>>> I think you might get an event if you set the new migrate capability +> +>>> called +> +>>> 'events' on! +> +>>> +> +>>> void migrate_set_state(int *state, int old_state, int new_state) +> +>>> { +> +>>> if (atomic_cmpxchg(state, old_state, new_state) == old_state) { +> +>>> trace_migrate_set_state(new_state); +> +>>> migrate_generate_event(new_state); +> +>>> } +> +>>> } +> +>>> +> +>>> static void migrate_generate_event(int new_state) +> +>>> { +> +>>> if (migrate_use_events()) { +> +>>> qapi_event_send_migration(new_state, &error_abort); +> +>>> } +> +>>> } +> +>>> +> +>>> That event feature went in sometime after 2.3.0. +> +>>> +> +>>>> One possibility is to suspend the monitor in qmp_migrate_cancel and +> +>>>> resume it (with add_migration_state_change_notifier) when we hit the +> +>>>> CANCELLED state. I'm not sure what the latency would be between the end +> +>>>> of migrate_fd_cancel and finally reaching CANCELLED. +> +>>> +> +>>> I don't like suspending monitors; it can potentially take quite a +> +>>> significant +> +>>> time to do a cancel. +> +>>> How about making 'cont' fail if we're in CANCELLING? +> +>> +> +>> Actually I thought that would be the case already (in fact CANCELLING is +> +>> internal only; the outside world sees it as "active" in query-migrate). +> +>> +> +>> Lei, what is the runstate? (That is, why did cont succeed at all)? +> +> +> +> I suspect it's RUN_STATE_FINISH_MIGRATE - we set that before we do the +> +> device +> +> save, and that's what we get at the end of a migrate and it's legal to +> +> restart +> +> from there. +> +> +Yeah, but I think we get there at the end of a failed migrate only. So +> +perhaps we can introduce a new state RUN_STATE_FAILED_MIGRATE and forbid +> +"cont" from finish-migrate (only allow it from failed-migrate)? +OK, I was wrong in my previous statement; we actually go +FINISH_MIGRATE->POSTMIGRATE +so no new state is needed; you shouldn't be restarting the cpu in +FINISH_MIGRATE. + +My preference is to get libvirt to wait for the transition to POSTMIGRATE before +it issues the 'cont'. I'd rather not block the monitor with 'cont' but I'm +not sure how we'd cleanly make cont fail without breaking existing libvirts +that usually don't hit this race. (cc'ing in Jiri). + +Dave + +> +Paolo +> +> +>> Paolo +> +>> +> +>>> I'd really love to see the 'run_on_cpu' being more careful about the BQL; +> +>>> we really need all of the rest of the devices to stay quiesced at times. +> +>> +> +>> That's not really possible, because of how condition variables work. :( +> +> +> +> *Really* we need to find a solution to that - there's probably lots of +> +> other things that can spring up in that small window other than the +> +> 'cont'. +> +> +> +> Dave +> +> +> +> -- +> +> Dr. David Alan Gilbert / address@hidden / Manchester, UK +> +> +-- +Dr. David Alan Gilbert / address@hidden / Manchester, UK + +Hi Dave, + +On Fri, Mar 3, 2017 at 9:26 PM, Dr. David Alan Gilbert <address@hidden> +wrote: + +> +* Paolo Bonzini (address@hidden) wrote: +> +> +> +> +> +> On 03/03/2017 14:11, Dr. David Alan Gilbert wrote: +> +> > * Paolo Bonzini (address@hidden) wrote: +> +> >> +> +> >> +> +> >> On 03/03/2017 13:00, Dr. David Alan Gilbert wrote: +> +... +> +> > That event feature went in sometime after 2.3.0. +> +> > +> +> >> One possibility is to suspend the monitor in qmp_migrate_cancel and +> +> >> resume it (with add_migration_state_change_notifier) when we hit the +> +> >> CANCELLED state. I'm not sure what the latency would be between the +> +end +> +> >> of migrate_fd_cancel and finally reaching CANCELLED. +> +> > +> +> > I don't like suspending monitors; it can potentially take quite a +> +significant +> +> > time to do a cancel. +> +> > How about making 'cont' fail if we're in CANCELLING? +> +> +> +> Actually I thought that would be the case already (in fact CANCELLING is +> +> internal only; the outside world sees it as "active" in query-migrate). +> +> +> +> Lei, what is the runstate? (That is, why did cont succeed at all)? +> +> +I suspect it's RUN_STATE_FINISH_MIGRATE - we set that before we do the +> +device +> +It is RUN_STATE_FINISH_MIGRATE. + + +> +save, and that's what we get at the end of a migrate and it's legal to +> +restart +> +from there. +> +> +> Paolo +> +> +> +> > I'd really love to see the 'run_on_cpu' being more careful about the +> +BQL; +> +> > we really need all of the rest of the devices to stay quiesced at +> +times. +> +> +> +> That's not really possible, because of how condition variables work. :( +> +> +*Really* we need to find a solution to that - there's probably lots of +> +other things that can spring up in that small window other than the +> +'cont'. +> +This is what I was worry about. Not only sync_cpu_state() will call +run_on_cpu() +but also vm_stop_force_state() will, both of them did hit the small windows +in our +test. + + +> +> +Dave +> +> +-- +> +Dr. David Alan Gilbert / address@hidden / Manchester, UK +> +> + diff --git a/results/classifier/014/none/21247035 b/results/classifier/014/none/21247035 new file mode 100644 index 00000000..46fc3644 --- /dev/null +++ b/results/classifier/014/none/21247035 @@ -0,0 +1,1348 @@ +mistranslation: 0.584 +user-level: 0.570 +permissions: 0.541 +register: 0.528 +device: 0.525 +KVM: 0.514 +TCG: 0.490 +ppc: 0.472 +debug: 0.468 +operating system: 0.456 +virtual: 0.449 +VMM: 0.443 +performance: 0.427 +graphic: 0.426 +arm: 0.420 +hypervisor: 0.416 +peripherals: 0.413 +x86: 0.412 +assembly: 0.391 +alpha: 0.390 +risc-v: 0.379 +files: 0.375 +architecture: 0.375 +semantic: 0.374 +PID: 0.370 +vnc: 0.367 +boot: 0.345 +i386: 0.332 +network: 0.322 +socket: 0.322 +kernel: 0.275 + +[Qemu-devel] [BUG] I/O thread segfault for QEMU on s390x + +Hi, +I have been noticing some segfaults for QEMU on s390x, and I have been +hitting this issue quite reliably (at least once in 10 runs of a test +case). The qemu version is 2.11.50, and I have systemd created coredumps +when this happens. + +Here is a back trace of the segfaulting thread: + + +#0 0x000003ffafed202c in swapcontext () from /lib64/libc.so.6 +#1 0x000002aa355c02ee in qemu_coroutine_new () at +util/coroutine-ucontext.c:164 +#2 0x000002aa355bec34 in qemu_coroutine_create +(address@hidden <blk_aio_read_entry>, +address@hidden) at util/qemu-coroutine.c:76 +#3 0x000002aa35510262 in blk_aio_prwv (blk=0x2aa65fbefa0, +offset=<optimized out>, bytes=<optimized out>, qiov=0x3ffa002a9c0, +address@hidden <blk_aio_read_entry>, flags=0, +cb=0x2aa35340a50 <virtio_blk_rw_complete>, opaque=0x3ffa002a960) at +block/block-backend.c:1299 +#4 0x000002aa35510376 in blk_aio_preadv (blk=<optimized out>, +offset=<optimized out>, qiov=<optimized out>, flags=<optimized out>, +cb=<optimized out>, opaque=0x3ffa002a960) at block/block-backend.c:1392 +#5 0x000002aa3534114e in submit_requests (niov=<optimized out>, +num_reqs=<optimized out>, start=<optimized out>, mrb=<optimized out>, +blk=<optimized out>) at +/usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:372 +#6 virtio_blk_submit_multireq (blk=<optimized out>, +address@hidden) at +/usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:402 +#7 0x000002aa353422e0 in virtio_blk_handle_vq (s=0x2aa6611e7d8, +vq=0x3ffb0f5f010) at /usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:620 +#8 0x000002aa3536655a in virtio_queue_notify_aio_vq +(address@hidden) at +/usr/src/debug/qemu-2.11.50/hw/virtio/virtio.c:1515 +#9 0x000002aa35366cd6 in virtio_queue_notify_aio_vq (vq=0x3ffb0f5f010) +at /usr/src/debug/qemu-2.11.50/hw/virtio/virtio.c:1511 +#10 virtio_queue_host_notifier_aio_poll (opaque=0x3ffb0f5f078) at +/usr/src/debug/qemu-2.11.50/hw/virtio/virtio.c:2409 +#11 0x000002aa355a8ba4 in run_poll_handlers_once +(address@hidden) at util/aio-posix.c:497 +#12 0x000002aa355a9b74 in run_poll_handlers (max_ns=<optimized out>, +ctx=0x2aa65f99310) at util/aio-posix.c:534 +#13 try_poll_mode (blocking=true, ctx=0x2aa65f99310) at util/aio-posix.c:562 +#14 aio_poll (ctx=0x2aa65f99310, address@hidden) at +util/aio-posix.c:602 +#15 0x000002aa353d2d0a in iothread_run (opaque=0x2aa65f990f0) at +iothread.c:60 +#16 0x000003ffb0f07e82 in start_thread () from /lib64/libpthread.so.0 +#17 0x000003ffaff91596 in thread_start () from /lib64/libc.so.6 +I don't have much knowledge about i/o threads and the block layer code +in QEMU, so I would like to report to the community about this issue. +I believe this very similar to the bug that I reported upstream couple +of days ago +( +https://lists.gnu.org/archive/html/qemu-devel/2018-02/msg04452.html +). +Any help would be greatly appreciated. + +Thanks +Farhan + +On Thu, Mar 1, 2018 at 10:33 PM, Farhan Ali <address@hidden> wrote: +> +Hi, +> +> +I have been noticing some segfaults for QEMU on s390x, and I have been +> +hitting this issue quite reliably (at least once in 10 runs of a test case). +> +The qemu version is 2.11.50, and I have systemd created coredumps +> +when this happens. +Can you describe the test case or suggest how to reproduce it for us? + +Fam + +On 03/02/2018 01:13 AM, Fam Zheng wrote: +On Thu, Mar 1, 2018 at 10:33 PM, Farhan Ali <address@hidden> wrote: +Hi, + +I have been noticing some segfaults for QEMU on s390x, and I have been +hitting this issue quite reliably (at least once in 10 runs of a test case). +The qemu version is 2.11.50, and I have systemd created coredumps +when this happens. +Can you describe the test case or suggest how to reproduce it for us? + +Fam +The test case is with a single guest, running a memory intensive +workload. The guest has 8 vpcus and 4G of memory. +Here is the qemu command line, if that helps: + +/usr/bin/qemu-kvm -name guest=sles,debug-threads=on \ +-S -object +secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-2-sles/master-key.aes +\ +-machine s390-ccw-virtio-2.12,accel=kvm,usb=off,dump-guest-core=off \ +-m 4096 -realtime mlock=off -smp 8,sockets=8,cores=1,threads=1 \ +-object iothread,id=iothread1 -object iothread,id=iothread2 -uuid +b83a596b-3a1a-4ac9-9f3e-d9a4032ee52c \ +-display none -no-user-config -nodefaults -chardev +socket,id=charmonitor,path=/var/lib/libvirt/qemu/domain-2-sles/monitor.sock,server,nowait +-mon chardev=charmonitor,id=monitor,mode=control -rtc base=utc +-no-shutdown \ +-boot strict=on -drive +file=/dev/mapper/360050763998b0883980000002400002b,format=raw,if=none,id=drive-virtio-disk0,cache=none,aio=native +-device +virtio-blk-ccw,iothread=iothread1,scsi=off,devno=fe.0.0001,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1 +-drive +file=/dev/mapper/360050763998b0883980000002800002f,format=raw,if=none,id=drive-virtio-disk1,cache=none,aio=native +-device +virtio-blk-ccw,iothread=iothread2,scsi=off,devno=fe.0.0002,drive=drive-virtio-disk1,id=virtio-disk1 +-netdev tap,fd=24,id=hostnet0,vhost=on,vhostfd=26 -device +virtio-net-ccw,netdev=hostnet0,id=net0,mac=02:38:a6:36:e8:1f,devno=fe.0.0000 +-chardev pty,id=charconsole0 -device +sclpconsole,chardev=charconsole0,id=console0 -device +virtio-balloon-ccw,id=balloon0,devno=fe.3.ffba -msg timestamp=on +Please let me know if I need to provide any other information. + +Thanks +Farhan + +On Thu, Mar 01, 2018 at 09:33:35AM -0500, Farhan Ali wrote: +> +Hi, +> +> +I have been noticing some segfaults for QEMU on s390x, and I have been +> +hitting this issue quite reliably (at least once in 10 runs of a test case). +> +The qemu version is 2.11.50, and I have systemd created coredumps +> +when this happens. +> +> +Here is a back trace of the segfaulting thread: +The backtrace looks normal. + +Please post the QEMU command-line and the details of the segfault (which +memory access faulted?). + +> +#0 0x000003ffafed202c in swapcontext () from /lib64/libc.so.6 +> +#1 0x000002aa355c02ee in qemu_coroutine_new () at +> +util/coroutine-ucontext.c:164 +> +#2 0x000002aa355bec34 in qemu_coroutine_create +> +(address@hidden <blk_aio_read_entry>, +> +address@hidden) at util/qemu-coroutine.c:76 +> +#3 0x000002aa35510262 in blk_aio_prwv (blk=0x2aa65fbefa0, offset=<optimized +> +out>, bytes=<optimized out>, qiov=0x3ffa002a9c0, +> +address@hidden <blk_aio_read_entry>, flags=0, +> +cb=0x2aa35340a50 <virtio_blk_rw_complete>, opaque=0x3ffa002a960) at +> +block/block-backend.c:1299 +> +#4 0x000002aa35510376 in blk_aio_preadv (blk=<optimized out>, +> +offset=<optimized out>, qiov=<optimized out>, flags=<optimized out>, +> +cb=<optimized out>, opaque=0x3ffa002a960) at block/block-backend.c:1392 +> +#5 0x000002aa3534114e in submit_requests (niov=<optimized out>, +> +num_reqs=<optimized out>, start=<optimized out>, mrb=<optimized out>, +> +blk=<optimized out>) at +> +/usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:372 +> +#6 virtio_blk_submit_multireq (blk=<optimized out>, +> +address@hidden) at +> +/usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:402 +> +#7 0x000002aa353422e0 in virtio_blk_handle_vq (s=0x2aa6611e7d8, +> +vq=0x3ffb0f5f010) at /usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:620 +> +#8 0x000002aa3536655a in virtio_queue_notify_aio_vq +> +(address@hidden) at +> +/usr/src/debug/qemu-2.11.50/hw/virtio/virtio.c:1515 +> +#9 0x000002aa35366cd6 in virtio_queue_notify_aio_vq (vq=0x3ffb0f5f010) at +> +/usr/src/debug/qemu-2.11.50/hw/virtio/virtio.c:1511 +> +#10 virtio_queue_host_notifier_aio_poll (opaque=0x3ffb0f5f078) at +> +/usr/src/debug/qemu-2.11.50/hw/virtio/virtio.c:2409 +> +#11 0x000002aa355a8ba4 in run_poll_handlers_once +> +(address@hidden) at util/aio-posix.c:497 +> +#12 0x000002aa355a9b74 in run_poll_handlers (max_ns=<optimized out>, +> +ctx=0x2aa65f99310) at util/aio-posix.c:534 +> +#13 try_poll_mode (blocking=true, ctx=0x2aa65f99310) at util/aio-posix.c:562 +> +#14 aio_poll (ctx=0x2aa65f99310, address@hidden) at +> +util/aio-posix.c:602 +> +#15 0x000002aa353d2d0a in iothread_run (opaque=0x2aa65f990f0) at +> +iothread.c:60 +> +#16 0x000003ffb0f07e82 in start_thread () from /lib64/libpthread.so.0 +> +#17 0x000003ffaff91596 in thread_start () from /lib64/libc.so.6 +> +> +> +I don't have much knowledge about i/o threads and the block layer code in +> +QEMU, so I would like to report to the community about this issue. +> +I believe this very similar to the bug that I reported upstream couple of +> +days ago +> +( +https://lists.gnu.org/archive/html/qemu-devel/2018-02/msg04452.html +). +> +> +Any help would be greatly appreciated. +> +> +Thanks +> +Farhan +> +signature.asc +Description: +PGP signature + +On 03/02/2018 04:23 AM, Stefan Hajnoczi wrote: +On Thu, Mar 01, 2018 at 09:33:35AM -0500, Farhan Ali wrote: +Hi, + +I have been noticing some segfaults for QEMU on s390x, and I have been +hitting this issue quite reliably (at least once in 10 runs of a test case). +The qemu version is 2.11.50, and I have systemd created coredumps +when this happens. + +Here is a back trace of the segfaulting thread: +The backtrace looks normal. + +Please post the QEMU command-line and the details of the segfault (which +memory access faulted?). +I was able to create another crash today and here is the qemu comand line + +/usr/bin/qemu-kvm -name guest=sles,debug-threads=on \ +-S -object +secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-2-sles/master-key.aes +\ +-machine s390-ccw-virtio-2.12,accel=kvm,usb=off,dump-guest-core=off \ +-m 4096 -realtime mlock=off -smp 8,sockets=8,cores=1,threads=1 \ +-object iothread,id=iothread1 -object iothread,id=iothread2 -uuid +b83a596b-3a1a-4ac9-9f3e-d9a4032ee52c \ +-display none -no-user-config -nodefaults -chardev +socket,id=charmonitor,path=/var/lib/libvirt/qemu/domain-2-sles/monitor.sock,server,nowait +-mon chardev=charmonitor,id=monitor,mode=control -rtc base=utc +-no-shutdown \ +-boot strict=on -drive +file=/dev/mapper/360050763998b0883980000002400002b,format=raw,if=none,id=drive-virtio-disk0,cache=none,aio=native +-device +virtio-blk-ccw,iothread=iothread1,scsi=off,devno=fe.0.0001,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1 +-drive +file=/dev/mapper/360050763998b0883980000002800002f,format=raw,if=none,id=drive-virtio-disk1,cache=none,aio=native +-device +virtio-blk-ccw,iothread=iothread2,scsi=off,devno=fe.0.0002,drive=drive-virtio-disk1,id=virtio-disk1 +-netdev tap,fd=24,id=hostnet0,vhost=on,vhostfd=26 -device +virtio-net-ccw,netdev=hostnet0,id=net0,mac=02:38:a6:36:e8:1f,devno=fe.0.0000 +-chardev pty,id=charconsole0 -device +sclpconsole,chardev=charconsole0,id=console0 -device +virtio-balloon-ccw,id=balloon0,devno=fe.3.ffba -msg timestamp=on +This the latest back trace on the segfaulting thread, and it seems to +segfault in swapcontext. +Program terminated with signal SIGSEGV, Segmentation fault. +#0 0x000003ff8595202c in swapcontext () from /lib64/libc.so.6 + + +This is the remaining back trace: + +#0 0x000003ff8595202c in swapcontext () from /lib64/libc.so.6 +#1 0x000002aa33b45566 in qemu_coroutine_new () at +util/coroutine-ucontext.c:164 +#2 0x000002aa33b43eac in qemu_coroutine_create +(address@hidden <blk_aio_write_entry>, +address@hidden) at util/qemu-coroutine.c:76 +#3 0x000002aa33a954da in blk_aio_prwv (blk=0x2aa4f0efda0, +offset=<optimized out>, bytes=<optimized out>, qiov=0x3ff74019080, +address@hidden <blk_aio_write_entry>, flags=0, +cb=0x2aa338c62e8 <virtio_blk_rw_complete>, opaque=0x3ff74019020) at +block/block-backend.c:1299 +#4 0x000002aa33a9563e in blk_aio_pwritev (blk=<optimized out>, +offset=<optimized out>, qiov=<optimized out>, flags=<optimized out>, +cb=<optimized out>, opaque=0x3ff74019020) at block/block-backend.c:1400 +#5 0x000002aa338c6a38 in submit_requests (niov=<optimized out>, +num_reqs=1, start=<optimized out>, mrb=0x3ff831fe6e0, blk=<optimized +out>) at /usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:369 +#6 virtio_blk_submit_multireq (blk=<optimized out>, +address@hidden) at +/usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:426 +#7 0x000002aa338c7b78 in virtio_blk_handle_vq (s=0x2aa4f2507c8, +vq=0x3ff869df010) at /usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:620 +#8 0x000002aa338ebdf2 in virtio_queue_notify_aio_vq (vq=0x3ff869df010) +at /usr/src/debug/qemu-2.11.50/hw/virtio/virtio.c:1515 +#9 0x000002aa33b2df46 in aio_dispatch_handlers +(address@hidden) at util/aio-posix.c:406 +#10 0x000002aa33b2eb50 in aio_poll (ctx=0x2aa4f0ca050, +address@hidden) at util/aio-posix.c:692 +#11 0x000002aa33957f6a in iothread_run (opaque=0x2aa4f0c9630) at +iothread.c:60 +#12 0x000003ff86987e82 in start_thread () from /lib64/libpthread.so.0 +#13 0x000003ff85a11596 in thread_start () from /lib64/libc.so.6 +Backtrace stopped: previous frame identical to this frame (corrupt stack?) + +On Fri, Mar 02, 2018 at 10:30:57AM -0500, Farhan Ali wrote: +> +> +> +On 03/02/2018 04:23 AM, Stefan Hajnoczi wrote: +> +> On Thu, Mar 01, 2018 at 09:33:35AM -0500, Farhan Ali wrote: +> +> > Hi, +> +> > +> +> > I have been noticing some segfaults for QEMU on s390x, and I have been +> +> > hitting this issue quite reliably (at least once in 10 runs of a test +> +> > case). +> +> > The qemu version is 2.11.50, and I have systemd created coredumps +> +> > when this happens. +> +> > +> +> > Here is a back trace of the segfaulting thread: +> +> The backtrace looks normal. +> +> +> +> Please post the QEMU command-line and the details of the segfault (which +> +> memory access faulted?). +> +> +> +> +> +I was able to create another crash today and here is the qemu comand line +> +> +/usr/bin/qemu-kvm -name guest=sles,debug-threads=on \ +> +-S -object +> +secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-2-sles/master-key.aes +> +\ +> +-machine s390-ccw-virtio-2.12,accel=kvm,usb=off,dump-guest-core=off \ +> +-m 4096 -realtime mlock=off -smp 8,sockets=8,cores=1,threads=1 \ +> +-object iothread,id=iothread1 -object iothread,id=iothread2 -uuid +> +b83a596b-3a1a-4ac9-9f3e-d9a4032ee52c \ +> +-display none -no-user-config -nodefaults -chardev +> +socket,id=charmonitor,path=/var/lib/libvirt/qemu/domain-2-sles/monitor.sock,server,nowait +> +> +-mon chardev=charmonitor,id=monitor,mode=control -rtc base=utc -no-shutdown +> +\ +> +-boot strict=on -drive +> +file=/dev/mapper/360050763998b0883980000002400002b,format=raw,if=none,id=drive-virtio-disk0,cache=none,aio=native +> +-device +> +virtio-blk-ccw,iothread=iothread1,scsi=off,devno=fe.0.0001,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1 +> +-drive +> +file=/dev/mapper/360050763998b0883980000002800002f,format=raw,if=none,id=drive-virtio-disk1,cache=none,aio=native +> +-device +> +virtio-blk-ccw,iothread=iothread2,scsi=off,devno=fe.0.0002,drive=drive-virtio-disk1,id=virtio-disk1 +> +-netdev tap,fd=24,id=hostnet0,vhost=on,vhostfd=26 -device +> +virtio-net-ccw,netdev=hostnet0,id=net0,mac=02:38:a6:36:e8:1f,devno=fe.0.0000 +> +-chardev pty,id=charconsole0 -device +> +sclpconsole,chardev=charconsole0,id=console0 -device +> +virtio-balloon-ccw,id=balloon0,devno=fe.3.ffba -msg timestamp=on +> +> +> +This the latest back trace on the segfaulting thread, and it seems to +> +segfault in swapcontext. +> +> +Program terminated with signal SIGSEGV, Segmentation fault. +> +#0 0x000003ff8595202c in swapcontext () from /lib64/libc.so.6 +Please include the following gdb output: + + (gdb) disas swapcontext + (gdb) i r + +That way it's possible to see which instruction faulted and which +registers were being accessed. + +> +This is the remaining back trace: +> +> +#0 0x000003ff8595202c in swapcontext () from /lib64/libc.so.6 +> +#1 0x000002aa33b45566 in qemu_coroutine_new () at +> +util/coroutine-ucontext.c:164 +> +#2 0x000002aa33b43eac in qemu_coroutine_create +> +(address@hidden <blk_aio_write_entry>, +> +address@hidden) at util/qemu-coroutine.c:76 +> +#3 0x000002aa33a954da in blk_aio_prwv (blk=0x2aa4f0efda0, offset=<optimized +> +out>, bytes=<optimized out>, qiov=0x3ff74019080, +> +address@hidden <blk_aio_write_entry>, flags=0, +> +cb=0x2aa338c62e8 <virtio_blk_rw_complete>, opaque=0x3ff74019020) at +> +block/block-backend.c:1299 +> +#4 0x000002aa33a9563e in blk_aio_pwritev (blk=<optimized out>, +> +offset=<optimized out>, qiov=<optimized out>, flags=<optimized out>, +> +cb=<optimized out>, opaque=0x3ff74019020) at block/block-backend.c:1400 +> +#5 0x000002aa338c6a38 in submit_requests (niov=<optimized out>, num_reqs=1, +> +start=<optimized out>, mrb=0x3ff831fe6e0, blk=<optimized out>) at +> +/usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:369 +> +#6 virtio_blk_submit_multireq (blk=<optimized out>, +> +address@hidden) at +> +/usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:426 +> +#7 0x000002aa338c7b78 in virtio_blk_handle_vq (s=0x2aa4f2507c8, +> +vq=0x3ff869df010) at /usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:620 +> +#8 0x000002aa338ebdf2 in virtio_queue_notify_aio_vq (vq=0x3ff869df010) at +> +/usr/src/debug/qemu-2.11.50/hw/virtio/virtio.c:1515 +> +#9 0x000002aa33b2df46 in aio_dispatch_handlers +> +(address@hidden) at util/aio-posix.c:406 +> +#10 0x000002aa33b2eb50 in aio_poll (ctx=0x2aa4f0ca050, +> +address@hidden) at util/aio-posix.c:692 +> +#11 0x000002aa33957f6a in iothread_run (opaque=0x2aa4f0c9630) at +> +iothread.c:60 +> +#12 0x000003ff86987e82 in start_thread () from /lib64/libpthread.so.0 +> +#13 0x000003ff85a11596 in thread_start () from /lib64/libc.so.6 +> +Backtrace stopped: previous frame identical to this frame (corrupt stack?) +> +signature.asc +Description: +PGP signature + +On 03/05/2018 06:03 AM, Stefan Hajnoczi wrote: +Please include the following gdb output: + + (gdb) disas swapcontext + (gdb) i r + +That way it's possible to see which instruction faulted and which +registers were being accessed. +here is the disas out for swapcontext, this is on a coredump with +debugging symbols enabled for qemu. So the addresses from the previous +dump is a little different. +(gdb) disas swapcontext +Dump of assembler code for function swapcontext: + 0x000003ff90751fb8 <+0>: lgr %r1,%r2 + 0x000003ff90751fbc <+4>: lgr %r0,%r3 + 0x000003ff90751fc0 <+8>: stfpc 248(%r1) + 0x000003ff90751fc4 <+12>: std %f0,256(%r1) + 0x000003ff90751fc8 <+16>: std %f1,264(%r1) + 0x000003ff90751fcc <+20>: std %f2,272(%r1) + 0x000003ff90751fd0 <+24>: std %f3,280(%r1) + 0x000003ff90751fd4 <+28>: std %f4,288(%r1) + 0x000003ff90751fd8 <+32>: std %f5,296(%r1) + 0x000003ff90751fdc <+36>: std %f6,304(%r1) + 0x000003ff90751fe0 <+40>: std %f7,312(%r1) + 0x000003ff90751fe4 <+44>: std %f8,320(%r1) + 0x000003ff90751fe8 <+48>: std %f9,328(%r1) + 0x000003ff90751fec <+52>: std %f10,336(%r1) + 0x000003ff90751ff0 <+56>: std %f11,344(%r1) + 0x000003ff90751ff4 <+60>: std %f12,352(%r1) + 0x000003ff90751ff8 <+64>: std %f13,360(%r1) + 0x000003ff90751ffc <+68>: std %f14,368(%r1) + 0x000003ff90752000 <+72>: std %f15,376(%r1) + 0x000003ff90752004 <+76>: slgr %r2,%r2 + 0x000003ff90752008 <+80>: stam %a0,%a15,184(%r1) + 0x000003ff9075200c <+84>: stmg %r0,%r15,56(%r1) + 0x000003ff90752012 <+90>: la %r2,2 + 0x000003ff90752016 <+94>: lgr %r5,%r0 + 0x000003ff9075201a <+98>: la %r3,384(%r5) + 0x000003ff9075201e <+102>: la %r4,384(%r1) + 0x000003ff90752022 <+106>: lghi %r5,8 + 0x000003ff90752026 <+110>: svc 175 + 0x000003ff90752028 <+112>: lgr %r5,%r0 +=> 0x000003ff9075202c <+116>: lfpc 248(%r5) + 0x000003ff90752030 <+120>: ld %f0,256(%r5) + 0x000003ff90752034 <+124>: ld %f1,264(%r5) + 0x000003ff90752038 <+128>: ld %f2,272(%r5) + 0x000003ff9075203c <+132>: ld %f3,280(%r5) + 0x000003ff90752040 <+136>: ld %f4,288(%r5) + 0x000003ff90752044 <+140>: ld %f5,296(%r5) + 0x000003ff90752048 <+144>: ld %f6,304(%r5) + 0x000003ff9075204c <+148>: ld %f7,312(%r5) + 0x000003ff90752050 <+152>: ld %f8,320(%r5) + 0x000003ff90752054 <+156>: ld %f9,328(%r5) + 0x000003ff90752058 <+160>: ld %f10,336(%r5) + 0x000003ff9075205c <+164>: ld %f11,344(%r5) + 0x000003ff90752060 <+168>: ld %f12,352(%r5) + 0x000003ff90752064 <+172>: ld %f13,360(%r5) + 0x000003ff90752068 <+176>: ld %f14,368(%r5) + 0x000003ff9075206c <+180>: ld %f15,376(%r5) + 0x000003ff90752070 <+184>: lam %a2,%a15,192(%r5) + 0x000003ff90752074 <+188>: lmg %r0,%r15,56(%r5) + 0x000003ff9075207a <+194>: br %r14 +End of assembler dump. + +(gdb) i r +r0 0x0 0 +r1 0x3ff8fe7de40 4396165881408 +r2 0x0 0 +r3 0x3ff8fe7e1c0 4396165882304 +r4 0x3ff8fe7dfc0 4396165881792 +r5 0x0 0 +r6 0xffffffff88004880 18446744071696304256 +r7 0x3ff880009e0 4396033247712 +r8 0x27ff89000 10736930816 +r9 0x3ff88001460 4396033250400 +r10 0x1000 4096 +r11 0x1261be0 19274720 +r12 0x3ff88001e00 4396033252864 +r13 0x14d0bc0 21826496 +r14 0x1312ac8 19999432 +r15 0x3ff8fe7dc80 4396165880960 +pc 0x3ff9075202c 0x3ff9075202c <swapcontext+116> +cc 0x2 2 + +On 03/05/2018 07:45 PM, Farhan Ali wrote: +> +> +> +On 03/05/2018 06:03 AM, Stefan Hajnoczi wrote: +> +> Please include the following gdb output: +> +> +> +>   (gdb) disas swapcontext +> +>   (gdb) i r +> +> +> +> That way it's possible to see which instruction faulted and which +> +> registers were being accessed. +> +> +> +here is the disas out for swapcontext, this is on a coredump with debugging +> +symbols enabled for qemu. So the addresses from the previous dump is a little +> +different. +> +> +> +(gdb) disas swapcontext +> +Dump of assembler code for function swapcontext: +> +  0x000003ff90751fb8 <+0>:   lgr   %r1,%r2 +> +  0x000003ff90751fbc <+4>:   lgr   %r0,%r3 +> +  0x000003ff90751fc0 <+8>:   stfpc   248(%r1) +> +  0x000003ff90751fc4 <+12>:   std   %f0,256(%r1) +> +  0x000003ff90751fc8 <+16>:   std   %f1,264(%r1) +> +  0x000003ff90751fcc <+20>:   std   %f2,272(%r1) +> +  0x000003ff90751fd0 <+24>:   std   %f3,280(%r1) +> +  0x000003ff90751fd4 <+28>:   std   %f4,288(%r1) +> +  0x000003ff90751fd8 <+32>:   std   %f5,296(%r1) +> +  0x000003ff90751fdc <+36>:   std   %f6,304(%r1) +> +  0x000003ff90751fe0 <+40>:   std   %f7,312(%r1) +> +  0x000003ff90751fe4 <+44>:   std   %f8,320(%r1) +> +  0x000003ff90751fe8 <+48>:   std   %f9,328(%r1) +> +  0x000003ff90751fec <+52>:   std   %f10,336(%r1) +> +  0x000003ff90751ff0 <+56>:   std   %f11,344(%r1) +> +  0x000003ff90751ff4 <+60>:   std   %f12,352(%r1) +> +  0x000003ff90751ff8 <+64>:   std   %f13,360(%r1) +> +  0x000003ff90751ffc <+68>:   std   %f14,368(%r1) +> +  0x000003ff90752000 <+72>:   std   %f15,376(%r1) +> +  0x000003ff90752004 <+76>:   slgr   %r2,%r2 +> +  0x000003ff90752008 <+80>:   stam   %a0,%a15,184(%r1) +> +  0x000003ff9075200c <+84>:   stmg   %r0,%r15,56(%r1) +> +  0x000003ff90752012 <+90>:   la   %r2,2 +> +  0x000003ff90752016 <+94>:   lgr   %r5,%r0 +> +  0x000003ff9075201a <+98>:   la   %r3,384(%r5) +> +  0x000003ff9075201e <+102>:   la   %r4,384(%r1) +> +  0x000003ff90752022 <+106>:   lghi   %r5,8 +> +  0x000003ff90752026 <+110>:   svc   175 +sys_rt_sigprocmask. r0 should not be changed by the system call. + +> +  0x000003ff90752028 <+112>:   lgr   %r5,%r0 +> +=> 0x000003ff9075202c <+116>:   lfpc   248(%r5) +so r5 is zero and it was loaded from r0. r0 was loaded from r3 (which is the +2nd parameter to this +function). Now this is odd. + +> +  0x000003ff90752030 <+120>:   ld   %f0,256(%r5) +> +  0x000003ff90752034 <+124>:   ld   %f1,264(%r5) +> +  0x000003ff90752038 <+128>:   ld   %f2,272(%r5) +> +  0x000003ff9075203c <+132>:   ld   %f3,280(%r5) +> +  0x000003ff90752040 <+136>:   ld   %f4,288(%r5) +> +  0x000003ff90752044 <+140>:   ld   %f5,296(%r5) +> +  0x000003ff90752048 <+144>:   ld   %f6,304(%r5) +> +  0x000003ff9075204c <+148>:   ld   %f7,312(%r5) +> +  0x000003ff90752050 <+152>:   ld   %f8,320(%r5) +> +  0x000003ff90752054 <+156>:   ld   %f9,328(%r5) +> +  0x000003ff90752058 <+160>:   ld   %f10,336(%r5) +> +  0x000003ff9075205c <+164>:   ld   %f11,344(%r5) +> +  0x000003ff90752060 <+168>:   ld   %f12,352(%r5) +> +  0x000003ff90752064 <+172>:   ld   %f13,360(%r5) +> +  0x000003ff90752068 <+176>:   ld   %f14,368(%r5) +> +  0x000003ff9075206c <+180>:   ld   %f15,376(%r5) +> +  0x000003ff90752070 <+184>:   lam   %a2,%a15,192(%r5) +> +  0x000003ff90752074 <+188>:   lmg   %r0,%r15,56(%r5) +> +  0x000003ff9075207a <+194>:   br   %r14 +> +End of assembler dump. +> +> +(gdb) i r +> +r0            0x0   0 +> +r1            0x3ff8fe7de40   4396165881408 +> +r2            0x0   0 +> +r3            0x3ff8fe7e1c0   4396165882304 +> +r4            0x3ff8fe7dfc0   4396165881792 +> +r5            0x0   0 +> +r6            0xffffffff88004880   18446744071696304256 +> +r7            0x3ff880009e0   4396033247712 +> +r8            0x27ff89000   10736930816 +> +r9            0x3ff88001460   4396033250400 +> +r10           0x1000   4096 +> +r11           0x1261be0   19274720 +> +r12           0x3ff88001e00   4396033252864 +> +r13           0x14d0bc0   21826496 +> +r14           0x1312ac8   19999432 +> +r15           0x3ff8fe7dc80   4396165880960 +> +pc            0x3ff9075202c   0x3ff9075202c <swapcontext+116> +> +cc            0x2   2 + +On 5 March 2018 at 18:54, Christian Borntraeger <address@hidden> wrote: +> +> +> +On 03/05/2018 07:45 PM, Farhan Ali wrote: +> +> 0x000003ff90752026 <+110>: svc 175 +> +> +sys_rt_sigprocmask. r0 should not be changed by the system call. +> +> +> 0x000003ff90752028 <+112>: lgr %r5,%r0 +> +> => 0x000003ff9075202c <+116>: lfpc 248(%r5) +> +> +so r5 is zero and it was loaded from r0. r0 was loaded from r3 (which is the +> +2nd parameter to this +> +function). Now this is odd. +...particularly given that the only place we call swapcontext() +the second parameter is always the address of a local variable +and can't be 0... + +thanks +-- PMM + +Do you happen to run with a recent host kernel that has + +commit 7041d28115e91f2144f811ffe8a195c696b1e1d0 + s390: scrub registers on kernel entry and KVM exit + + + + + +Can you run with this on top +diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S +index 13a133a6015c..d6dc0e5e8f74 100644 +--- a/arch/s390/kernel/entry.S ++++ b/arch/s390/kernel/entry.S +@@ -426,13 +426,13 @@ ENTRY(system_call) + UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER + BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP + stmg %r0,%r7,__PT_R0(%r11) +- # clear user controlled register to prevent speculative use +- xgr %r0,%r0 + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC + mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC + stg %r14,__PT_FLAGS(%r11) + .Lsysc_do_svc: ++ # clear user controlled register to prevent speculative use ++ xgr %r0,%r0 + # load address of system call table + lg %r10,__THREAD_sysc_table(%r13,%r12) + llgh %r8,__PT_INT_CODE+2(%r11) + + +To me it looks like that the critical section cleanup (interrupt during system +call entry) might +save the registers again into ptregs but we have already zeroed out r0. +This patch moves the clearing of r0 after sysc_do_svc, which should fix the +critical +section cleanup. + +Adding Martin and Heiko. Will spin a patch. + + +On 03/05/2018 07:54 PM, Christian Borntraeger wrote: +> +> +> +On 03/05/2018 07:45 PM, Farhan Ali wrote: +> +> +> +> +> +> On 03/05/2018 06:03 AM, Stefan Hajnoczi wrote: +> +>> Please include the following gdb output: +> +>> +> +>>   (gdb) disas swapcontext +> +>>   (gdb) i r +> +>> +> +>> That way it's possible to see which instruction faulted and which +> +>> registers were being accessed. +> +> +> +> +> +> here is the disas out for swapcontext, this is on a coredump with debugging +> +> symbols enabled for qemu. So the addresses from the previous dump is a +> +> little different. +> +> +> +> +> +> (gdb) disas swapcontext +> +> Dump of assembler code for function swapcontext: +> +>   0x000003ff90751fb8 <+0>:   lgr   %r1,%r2 +> +>   0x000003ff90751fbc <+4>:   lgr   %r0,%r3 +> +>   0x000003ff90751fc0 <+8>:   stfpc   248(%r1) +> +>   0x000003ff90751fc4 <+12>:   std   %f0,256(%r1) +> +>   0x000003ff90751fc8 <+16>:   std   %f1,264(%r1) +> +>   0x000003ff90751fcc <+20>:   std   %f2,272(%r1) +> +>   0x000003ff90751fd0 <+24>:   std   %f3,280(%r1) +> +>   0x000003ff90751fd4 <+28>:   std   %f4,288(%r1) +> +>   0x000003ff90751fd8 <+32>:   std   %f5,296(%r1) +> +>   0x000003ff90751fdc <+36>:   std   %f6,304(%r1) +> +>   0x000003ff90751fe0 <+40>:   std   %f7,312(%r1) +> +>   0x000003ff90751fe4 <+44>:   std   %f8,320(%r1) +> +>   0x000003ff90751fe8 <+48>:   std   %f9,328(%r1) +> +>   0x000003ff90751fec <+52>:   std   %f10,336(%r1) +> +>   0x000003ff90751ff0 <+56>:   std   %f11,344(%r1) +> +>   0x000003ff90751ff4 <+60>:   std   %f12,352(%r1) +> +>   0x000003ff90751ff8 <+64>:   std   %f13,360(%r1) +> +>   0x000003ff90751ffc <+68>:   std   %f14,368(%r1) +> +>   0x000003ff90752000 <+72>:   std   %f15,376(%r1) +> +>   0x000003ff90752004 <+76>:   slgr   %r2,%r2 +> +>   0x000003ff90752008 <+80>:   stam   %a0,%a15,184(%r1) +> +>   0x000003ff9075200c <+84>:   stmg   %r0,%r15,56(%r1) +> +>   0x000003ff90752012 <+90>:   la   %r2,2 +> +>   0x000003ff90752016 <+94>:   lgr   %r5,%r0 +> +>   0x000003ff9075201a <+98>:   la   %r3,384(%r5) +> +>   0x000003ff9075201e <+102>:   la   %r4,384(%r1) +> +>   0x000003ff90752022 <+106>:   lghi   %r5,8 +> +>   0x000003ff90752026 <+110>:   svc   175 +> +> +sys_rt_sigprocmask. r0 should not be changed by the system call. +> +> +>   0x000003ff90752028 <+112>:   lgr   %r5,%r0 +> +> => 0x000003ff9075202c <+116>:   lfpc   248(%r5) +> +> +so r5 is zero and it was loaded from r0. r0 was loaded from r3 (which is the +> +2nd parameter to this +> +function). Now this is odd. +> +> +>   0x000003ff90752030 <+120>:   ld   %f0,256(%r5) +> +>   0x000003ff90752034 <+124>:   ld   %f1,264(%r5) +> +>   0x000003ff90752038 <+128>:   ld   %f2,272(%r5) +> +>   0x000003ff9075203c <+132>:   ld   %f3,280(%r5) +> +>   0x000003ff90752040 <+136>:   ld   %f4,288(%r5) +> +>   0x000003ff90752044 <+140>:   ld   %f5,296(%r5) +> +>   0x000003ff90752048 <+144>:   ld   %f6,304(%r5) +> +>   0x000003ff9075204c <+148>:   ld   %f7,312(%r5) +> +>   0x000003ff90752050 <+152>:   ld   %f8,320(%r5) +> +>   0x000003ff90752054 <+156>:   ld   %f9,328(%r5) +> +>   0x000003ff90752058 <+160>:   ld   %f10,336(%r5) +> +>   0x000003ff9075205c <+164>:   ld   %f11,344(%r5) +> +>   0x000003ff90752060 <+168>:   ld   %f12,352(%r5) +> +>   0x000003ff90752064 <+172>:   ld   %f13,360(%r5) +> +>   0x000003ff90752068 <+176>:   ld   %f14,368(%r5) +> +>   0x000003ff9075206c <+180>:   ld   %f15,376(%r5) +> +>   0x000003ff90752070 <+184>:   lam   %a2,%a15,192(%r5) +> +>   0x000003ff90752074 <+188>:   lmg   %r0,%r15,56(%r5) +> +>   0x000003ff9075207a <+194>:   br   %r14 +> +> End of assembler dump. +> +> +> +> (gdb) i r +> +> r0            0x0   0 +> +> r1            0x3ff8fe7de40   4396165881408 +> +> r2            0x0   0 +> +> r3            0x3ff8fe7e1c0   4396165882304 +> +> r4            0x3ff8fe7dfc0   4396165881792 +> +> r5            0x0   0 +> +> r6            0xffffffff88004880   18446744071696304256 +> +> r7            0x3ff880009e0   4396033247712 +> +> r8            0x27ff89000   10736930816 +> +> r9            0x3ff88001460   4396033250400 +> +> r10           0x1000   4096 +> +> r11           0x1261be0   19274720 +> +> r12           0x3ff88001e00   4396033252864 +> +> r13           0x14d0bc0   21826496 +> +> r14           0x1312ac8   19999432 +> +> r15           0x3ff8fe7dc80   4396165880960 +> +> pc            0x3ff9075202c   0x3ff9075202c <swapcontext+116> +> +> cc            0x2   2 + +On 03/05/2018 02:08 PM, Christian Borntraeger wrote: +Do you happen to run with a recent host kernel that has + +commit 7041d28115e91f2144f811ffe8a195c696b1e1d0 + s390: scrub registers on kernel entry and KVM exit +Yes. +Can you run with this on top +diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S +index 13a133a6015c..d6dc0e5e8f74 100644 +--- a/arch/s390/kernel/entry.S ++++ b/arch/s390/kernel/entry.S +@@ -426,13 +426,13 @@ ENTRY(system_call) + UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER + BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP + stmg %r0,%r7,__PT_R0(%r11) +- # clear user controlled register to prevent speculative use +- xgr %r0,%r0 + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC + mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC + stg %r14,__PT_FLAGS(%r11) + .Lsysc_do_svc: ++ # clear user controlled register to prevent speculative use ++ xgr %r0,%r0 + # load address of system call table + lg %r10,__THREAD_sysc_table(%r13,%r12) + llgh %r8,__PT_INT_CODE+2(%r11) + + +To me it looks like that the critical section cleanup (interrupt during system +call entry) might +save the registers again into ptregs but we have already zeroed out r0. +This patch moves the clearing of r0 after sysc_do_svc, which should fix the +critical +section cleanup. +Okay I will run with this. +Adding Martin and Heiko. Will spin a patch. + + +On 03/05/2018 07:54 PM, Christian Borntraeger wrote: +On 03/05/2018 07:45 PM, Farhan Ali wrote: +On 03/05/2018 06:03 AM, Stefan Hajnoczi wrote: +Please include the following gdb output: + +   (gdb) disas swapcontext +   (gdb) i r + +That way it's possible to see which instruction faulted and which +registers were being accessed. +here is the disas out for swapcontext, this is on a coredump with debugging +symbols enabled for qemu. So the addresses from the previous dump is a little +different. + + +(gdb) disas swapcontext +Dump of assembler code for function swapcontext: +   0x000003ff90751fb8 <+0>:   lgr   %r1,%r2 +   0x000003ff90751fbc <+4>:   lgr   %r0,%r3 +   0x000003ff90751fc0 <+8>:   stfpc   248(%r1) +   0x000003ff90751fc4 <+12>:   std   %f0,256(%r1) +   0x000003ff90751fc8 <+16>:   std   %f1,264(%r1) +   0x000003ff90751fcc <+20>:   std   %f2,272(%r1) +   0x000003ff90751fd0 <+24>:   std   %f3,280(%r1) +   0x000003ff90751fd4 <+28>:   std   %f4,288(%r1) +   0x000003ff90751fd8 <+32>:   std   %f5,296(%r1) +   0x000003ff90751fdc <+36>:   std   %f6,304(%r1) +   0x000003ff90751fe0 <+40>:   std   %f7,312(%r1) +   0x000003ff90751fe4 <+44>:   std   %f8,320(%r1) +   0x000003ff90751fe8 <+48>:   std   %f9,328(%r1) +   0x000003ff90751fec <+52>:   std   %f10,336(%r1) +   0x000003ff90751ff0 <+56>:   std   %f11,344(%r1) +   0x000003ff90751ff4 <+60>:   std   %f12,352(%r1) +   0x000003ff90751ff8 <+64>:   std   %f13,360(%r1) +   0x000003ff90751ffc <+68>:   std   %f14,368(%r1) +   0x000003ff90752000 <+72>:   std   %f15,376(%r1) +   0x000003ff90752004 <+76>:   slgr   %r2,%r2 +   0x000003ff90752008 <+80>:   stam   %a0,%a15,184(%r1) +   0x000003ff9075200c <+84>:   stmg   %r0,%r15,56(%r1) +   0x000003ff90752012 <+90>:   la   %r2,2 +   0x000003ff90752016 <+94>:   lgr   %r5,%r0 +   0x000003ff9075201a <+98>:   la   %r3,384(%r5) +   0x000003ff9075201e <+102>:   la   %r4,384(%r1) +   0x000003ff90752022 <+106>:   lghi   %r5,8 +   0x000003ff90752026 <+110>:   svc   175 +sys_rt_sigprocmask. r0 should not be changed by the system call. +  0x000003ff90752028 <+112>:   lgr   %r5,%r0 +=> 0x000003ff9075202c <+116>:   lfpc   248(%r5) +so r5 is zero and it was loaded from r0. r0 was loaded from r3 (which is the +2nd parameter to this +function). Now this is odd. +  0x000003ff90752030 <+120>:   ld   %f0,256(%r5) +   0x000003ff90752034 <+124>:   ld   %f1,264(%r5) +   0x000003ff90752038 <+128>:   ld   %f2,272(%r5) +   0x000003ff9075203c <+132>:   ld   %f3,280(%r5) +   0x000003ff90752040 <+136>:   ld   %f4,288(%r5) +   0x000003ff90752044 <+140>:   ld   %f5,296(%r5) +   0x000003ff90752048 <+144>:   ld   %f6,304(%r5) +   0x000003ff9075204c <+148>:   ld   %f7,312(%r5) +   0x000003ff90752050 <+152>:   ld   %f8,320(%r5) +   0x000003ff90752054 <+156>:   ld   %f9,328(%r5) +   0x000003ff90752058 <+160>:   ld   %f10,336(%r5) +   0x000003ff9075205c <+164>:   ld   %f11,344(%r5) +   0x000003ff90752060 <+168>:   ld   %f12,352(%r5) +   0x000003ff90752064 <+172>:   ld   %f13,360(%r5) +   0x000003ff90752068 <+176>:   ld   %f14,368(%r5) +   0x000003ff9075206c <+180>:   ld   %f15,376(%r5) +   0x000003ff90752070 <+184>:   lam   %a2,%a15,192(%r5) +   0x000003ff90752074 <+188>:   lmg   %r0,%r15,56(%r5) +   0x000003ff9075207a <+194>:   br   %r14 +End of assembler dump. + +(gdb) i r +r0            0x0   0 +r1            0x3ff8fe7de40   4396165881408 +r2            0x0   0 +r3            0x3ff8fe7e1c0   4396165882304 +r4            0x3ff8fe7dfc0   4396165881792 +r5            0x0   0 +r6            0xffffffff88004880   18446744071696304256 +r7            0x3ff880009e0   4396033247712 +r8            0x27ff89000   10736930816 +r9            0x3ff88001460   4396033250400 +r10           0x1000   4096 +r11           0x1261be0   19274720 +r12           0x3ff88001e00   4396033252864 +r13           0x14d0bc0   21826496 +r14           0x1312ac8   19999432 +r15           0x3ff8fe7dc80   4396165880960 +pc            0x3ff9075202c   0x3ff9075202c <swapcontext+116> +cc            0x2   2 + +On Mon, 5 Mar 2018 20:08:45 +0100 +Christian Borntraeger <address@hidden> wrote: + +> +Do you happen to run with a recent host kernel that has +> +> +commit 7041d28115e91f2144f811ffe8a195c696b1e1d0 +> +s390: scrub registers on kernel entry and KVM exit +> +> +Can you run with this on top +> +diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S +> +index 13a133a6015c..d6dc0e5e8f74 100644 +> +--- a/arch/s390/kernel/entry.S +> ++++ b/arch/s390/kernel/entry.S +> +@@ -426,13 +426,13 @@ ENTRY(system_call) +> +UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER +> +BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP +> +stmg %r0,%r7,__PT_R0(%r11) +> +- # clear user controlled register to prevent speculative use +> +- xgr %r0,%r0 +> +mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC +> +mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW +> +mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC +> +stg %r14,__PT_FLAGS(%r11) +> +.Lsysc_do_svc: +> ++ # clear user controlled register to prevent speculative use +> ++ xgr %r0,%r0 +> +# load address of system call table +> +lg %r10,__THREAD_sysc_table(%r13,%r12) +> +llgh %r8,__PT_INT_CODE+2(%r11) +> +> +> +To me it looks like that the critical section cleanup (interrupt during +> +system call entry) might +> +save the registers again into ptregs but we have already zeroed out r0. +> +This patch moves the clearing of r0 after sysc_do_svc, which should fix the +> +critical +> +section cleanup. +> +> +Adding Martin and Heiko. Will spin a patch. +Argh, yes. Thanks Chrisitan, this is it. I have been searching for the bug +for days now. The point is that if the system call handler is interrupted +after the xgr but before .Lsysc_do_svc the code at .Lcleanup_system_call +repeats the stmg for %r0-%r7 but now %r0 is already zero. + +Please commit a patch for this and I'll will queue it up immediately. + +-- +blue skies, + Martin. + +"Reality continues to ruin my life." - Calvin. + +On 03/06/2018 01:34 AM, Martin Schwidefsky wrote: +On Mon, 5 Mar 2018 20:08:45 +0100 +Christian Borntraeger <address@hidden> wrote: +Do you happen to run with a recent host kernel that has + +commit 7041d28115e91f2144f811ffe8a195c696b1e1d0 + s390: scrub registers on kernel entry and KVM exit + +Can you run with this on top +diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S +index 13a133a6015c..d6dc0e5e8f74 100644 +--- a/arch/s390/kernel/entry.S ++++ b/arch/s390/kernel/entry.S +@@ -426,13 +426,13 @@ ENTRY(system_call) + UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER + BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP + stmg %r0,%r7,__PT_R0(%r11) +- # clear user controlled register to prevent speculative use +- xgr %r0,%r0 + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC + mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC + stg %r14,__PT_FLAGS(%r11) + .Lsysc_do_svc: ++ # clear user controlled register to prevent speculative use ++ xgr %r0,%r0 + # load address of system call table + lg %r10,__THREAD_sysc_table(%r13,%r12) + llgh %r8,__PT_INT_CODE+2(%r11) + + +To me it looks like that the critical section cleanup (interrupt during system +call entry) might +save the registers again into ptregs but we have already zeroed out r0. +This patch moves the clearing of r0 after sysc_do_svc, which should fix the +critical +section cleanup. + +Adding Martin and Heiko. Will spin a patch. +Argh, yes. Thanks Chrisitan, this is it. I have been searching for the bug +for days now. The point is that if the system call handler is interrupted +after the xgr but before .Lsysc_do_svc the code at .Lcleanup_system_call +repeats the stmg for %r0-%r7 but now %r0 is already zero. + +Please commit a patch for this and I'll will queue it up immediately. +This patch does fix the QEMU crash. I haven't seen the crash after +running the test case for more than a day. Thanks to everyone for taking +a look at this problem :) +Thanks +Farhan + diff --git a/results/classifier/014/none/33802194 b/results/classifier/014/none/33802194 new file mode 100644 index 00000000..9bd2404f --- /dev/null +++ b/results/classifier/014/none/33802194 @@ -0,0 +1,4966 @@ +ppc: 0.738 +user-level: 0.737 +virtual: 0.728 +vnc: 0.728 +KVM: 0.725 +register: 0.720 +arm: 0.716 +permissions: 0.705 +VMM: 0.704 +hypervisor: 0.700 +risc-v: 0.699 +TCG: 0.697 +device: 0.691 +peripherals: 0.690 +mistranslation: 0.687 +debug: 0.681 +operating system: 0.676 +x86: 0.674 +kernel: 0.661 +performance: 0.659 +assembly: 0.657 +semantic: 0.656 +socket: 0.655 +architecture: 0.646 +network: 0.644 +graphic: 0.640 +PID: 0.636 +boot: 0.631 +alpha: 0.605 +files: 0.598 +i386: 0.539 + +[BUG] cxl can not create region + +Hi list + +I want to test cxl functions in arm64, and found some problems I can't +figure out. + +My test environment: + +1. build latest bios from +https://github.com/tianocore/edk2.git +master +branch(cc2db6ebfb6d9d85ba4c7b35fba1fa37fffc0bc2) +2. build latest qemu-system-aarch64 from git://git.qemu.org/qemu.git +master branch(846dcf0ba4eff824c295f06550b8673ff3f31314). With cxl arm +support patch: +https://patchwork.kernel.org/project/cxl/cover/20220616141950.23374-1-Jonathan.Cameron@huawei.com/ +3. build Linux kernel from +https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git +preview +branch(65fc1c3d26b96002a5aa1f4012fae4dc98fd5683) +4. build latest ndctl tools from +https://github.com/pmem/ndctl +create_region branch(8558b394e449779e3a4f3ae90fae77ede0bca159) + +And my qemu test commands: +sudo $QEMU_BIN -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 \ + -cpu max -smp 8 -nographic -no-reboot \ + -kernel $KERNEL -bios $BIOS_BIN \ + -drive if=none,file=$ROOTFS,format=qcow2,id=hd \ + -device virtio-blk-pci,drive=hd -append 'root=/dev/vda1 +nokaslr dyndbg="module cxl* +p"' \ + -object memory-backend-ram,size=4G,id=mem0 \ + -numa node,nodeid=0,cpus=0-7,memdev=mem0 \ + -net nic -net user,hostfwd=tcp::2222-:22 -enable-kvm \ + -object +memory-backend-file,id=cxl-mem0,share=on,mem-path=/tmp/cxltest.raw,size=256M +\ + -object +memory-backend-file,id=cxl-mem1,share=on,mem-path=/tmp/cxltest1.raw,size=256M +\ + -object +memory-backend-file,id=cxl-mem2,share=on,mem-path=/tmp/cxltest2.raw,size=256M +\ + -object +memory-backend-file,id=cxl-mem3,share=on,mem-path=/tmp/cxltest3.raw,size=256M +\ + -object +memory-backend-file,id=cxl-lsa0,share=on,mem-path=/tmp/lsa0.raw,size=256M +\ + -object +memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/lsa1.raw,size=256M +\ + -object +memory-backend-file,id=cxl-lsa2,share=on,mem-path=/tmp/lsa2.raw,size=256M +\ + -object +memory-backend-file,id=cxl-lsa3,share=on,mem-path=/tmp/lsa3.raw,size=256M +\ + -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \ + -device cxl-rp,port=0,bus=cxl.1,id=root_port0,chassis=0,slot=0 \ + -device cxl-upstream,bus=root_port0,id=us0 \ + -device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \ + -device +cxl-type3,bus=swport0,memdev=cxl-mem0,lsa=cxl-lsa0,id=cxl-pmem0 \ + -device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \ + -device +cxl-type3,bus=swport1,memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem1 \ + -device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \ + -device +cxl-type3,bus=swport2,memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem2 \ + -device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \ + -device +cxl-type3,bus=swport3,memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3 \ + -M +cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k + +And I have got two problems. +1. When I want to create x1 region with command: "cxl create-region -d +decoder0.0 -w 1 -g 4096 mem0", kernel crashed with null pointer +reference. Crash log: + +[ 534.697324] cxl_region region0: config state: 0 +[ 534.697346] cxl_region region0: probe: -6 +[ 534.697368] cxl_acpi ACPI0017:00: decoder0.0: created region0 +[ 534.699115] cxl region0: mem0:endpoint3 decoder3.0 add: +mem0:decoder3.0 @ 0 next: none nr_eps: 1 nr_targets: 1 +[ 534.699149] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +mem0:decoder3.0 @ 0 next: mem0 nr_eps: 1 nr_targets: 1 +[ 534.699167] cxl region0: ACPI0016:00:port1 decoder1.0 add: +mem0:decoder3.0 @ 0 next: 0000:0d:00.0 nr_eps: 1 nr_targets: 1 +[ 534.699176] cxl region0: ACPI0016:00:port1 iw: 1 ig: 256 +[ 534.699182] cxl region0: ACPI0016:00:port1 target[0] = 0000:0c:00.0 +for mem0:decoder3.0 @ 0 +[ 534.699189] cxl region0: 0000:0d:00.0:port2 iw: 1 ig: 256 +[ 534.699193] cxl region0: 0000:0d:00.0:port2 target[0] = +0000:0e:00.0 for mem0:decoder3.0 @ 0 +[ 534.699405] Unable to handle kernel NULL pointer dereference at +virtual address 0000000000000000 +[ 534.701474] Mem abort info: +[ 534.701994] ESR = 0x0000000086000004 +[ 534.702653] EC = 0x21: IABT (current EL), IL = 32 bits +[ 534.703616] SET = 0, FnV = 0 +[ 534.704174] EA = 0, S1PTW = 0 +[ 534.704803] FSC = 0x04: level 0 translation fault +[ 534.705694] user pgtable: 4k pages, 48-bit VAs, pgdp=000000010144a000 +[ 534.706875] [0000000000000000] pgd=0000000000000000, p4d=0000000000000000 +[ 534.709855] Internal error: Oops: 86000004 [#1] PREEMPT SMP +[ 534.710301] Modules linked in: +[ 534.710546] CPU: 7 PID: 331 Comm: cxl Not tainted +5.19.0-rc3-00064-g65fc1c3d26b9-dirty #11 +[ 534.715393] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 +[ 534.717179] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) +[ 534.719190] pc : 0x0 +[ 534.719928] lr : commit_store+0x118/0x2cc +[ 534.721007] sp : ffff80000aec3c30 +[ 534.721793] x29: ffff80000aec3c30 x28: ffff0000da62e740 x27: ffff0000c0c06b30 +[ 534.723875] x26: 0000000000000000 x25: ffff0000c0a2a400 x24: ffff0000c0a29400 +[ 534.725440] x23: 0000000000000003 x22: 0000000000000000 x21: ffff0000c0c06800 +[ 534.727312] x20: 0000000000000000 x19: ffff0000c1559800 x18: 0000000000000000 +[ 534.729138] x17: 0000000000000000 x16: 0000000000000000 x15: 0000ffffd41fe838 +[ 534.731046] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 +[ 534.732402] x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000 +[ 534.734432] x8 : 0000000000000000 x7 : 0000000000000000 x6 : ffff0000c0906e80 +[ 534.735921] x5 : 0000000000000000 x4 : 0000000000000000 x3 : ffff80000aec3bf0 +[ 534.737437] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff0000c155a000 +[ 534.738878] Call trace: +[ 534.739368] 0x0 +[ 534.739713] dev_attr_store+0x1c/0x30 +[ 534.740186] sysfs_kf_write+0x48/0x58 +[ 534.740961] kernfs_fop_write_iter+0x128/0x184 +[ 534.741872] new_sync_write+0xdc/0x158 +[ 534.742706] vfs_write+0x1ac/0x2a8 +[ 534.743440] ksys_write+0x68/0xf0 +[ 534.744328] __arm64_sys_write+0x1c/0x28 +[ 534.745180] invoke_syscall+0x44/0xf0 +[ 534.745989] el0_svc_common+0x4c/0xfc +[ 534.746661] do_el0_svc+0x60/0xa8 +[ 534.747378] el0_svc+0x2c/0x78 +[ 534.748066] el0t_64_sync_handler+0xb8/0x12c +[ 534.748919] el0t_64_sync+0x18c/0x190 +[ 534.749629] Code: bad PC value +[ 534.750169] ---[ end trace 0000000000000000 ]--- + +2. When I want to create x4 region with command: "cxl create-region -d +decoder0.0 -w 4 -g 4096 -m mem0 mem1 mem2 mem3". I got below errors: + +cxl region: create_region: region0: failed to set target3 to mem3 +cxl region: cmd_create_region: created 0 regions + +And kernel log as below: +[ 60.536663] cxl_region region0: config state: 0 +[ 60.536675] cxl_region region0: probe: -6 +[ 60.536696] cxl_acpi ACPI0017:00: decoder0.0: created region0 +[ 60.538251] cxl region0: mem0:endpoint3 decoder3.0 add: +mem0:decoder3.0 @ 0 next: none nr_eps: 1 nr_targets: 1 +[ 60.538278] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +mem0:decoder3.0 @ 0 next: mem0 nr_eps: 1 nr_targets: 1 +[ 60.538295] cxl region0: ACPI0016:00:port1 decoder1.0 add: +mem0:decoder3.0 @ 0 next: 0000:0d:00.0 nr_eps: 1 nr_targets: 1 +[ 60.538647] cxl region0: mem1:endpoint4 decoder4.0 add: +mem1:decoder4.0 @ 1 next: none nr_eps: 1 nr_targets: 1 +[ 60.538663] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +mem1:decoder4.0 @ 1 next: mem1 nr_eps: 2 nr_targets: 2 +[ 60.538675] cxl region0: ACPI0016:00:port1 decoder1.0 add: +mem1:decoder4.0 @ 1 next: 0000:0d:00.0 nr_eps: 2 nr_targets: 1 +[ 60.539311] cxl region0: mem2:endpoint5 decoder5.0 add: +mem2:decoder5.0 @ 2 next: none nr_eps: 1 nr_targets: 1 +[ 60.539332] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +mem2:decoder5.0 @ 2 next: mem2 nr_eps: 3 nr_targets: 3 +[ 60.539343] cxl region0: ACPI0016:00:port1 decoder1.0 add: +mem2:decoder5.0 @ 2 next: 0000:0d:00.0 nr_eps: 3 nr_targets: 1 +[ 60.539711] cxl region0: mem3:endpoint6 decoder6.0 add: +mem3:decoder6.0 @ 3 next: none nr_eps: 1 nr_targets: 1 +[ 60.539723] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +mem3:decoder6.0 @ 3 next: mem3 nr_eps: 4 nr_targets: 4 +[ 60.539735] cxl region0: ACPI0016:00:port1 decoder1.0 add: +mem3:decoder6.0 @ 3 next: 0000:0d:00.0 nr_eps: 4 nr_targets: 1 +[ 60.539742] cxl region0: ACPI0016:00:port1 iw: 1 ig: 256 +[ 60.539747] cxl region0: ACPI0016:00:port1 target[0] = 0000:0c:00.0 +for mem0:decoder3.0 @ 0 +[ 60.539754] cxl region0: 0000:0d:00.0:port2 iw: 4 ig: 512 +[ 60.539758] cxl region0: 0000:0d:00.0:port2 target[0] = +0000:0e:00.0 for mem0:decoder3.0 @ 0 +[ 60.539764] cxl region0: ACPI0016:00:port1: cannot host mem1:decoder4.0 at 1 + +I have tried to write sysfs node manually, got same errors. + +Hope I can get some helps here. + +Bob + +On Fri, 5 Aug 2022 10:20:23 +0800 +Bobo WL <lmw.bobo@gmail.com> wrote: + +> +Hi list +> +> +I want to test cxl functions in arm64, and found some problems I can't +> +figure out. +Hi Bob, + +Glad to see people testing this code. + +> +> +My test environment: +> +> +1. build latest bios from +https://github.com/tianocore/edk2.git +master +> +branch(cc2db6ebfb6d9d85ba4c7b35fba1fa37fffc0bc2) +> +2. build latest qemu-system-aarch64 from git://git.qemu.org/qemu.git +> +master branch(846dcf0ba4eff824c295f06550b8673ff3f31314). With cxl arm +> +support patch: +> +https://patchwork.kernel.org/project/cxl/cover/20220616141950.23374-1-Jonathan.Cameron@huawei.com/ +> +3. build Linux kernel from +> +https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git +preview +> +branch(65fc1c3d26b96002a5aa1f4012fae4dc98fd5683) +> +4. build latest ndctl tools from +https://github.com/pmem/ndctl +> +create_region branch(8558b394e449779e3a4f3ae90fae77ede0bca159) +> +> +And my qemu test commands: +> +sudo $QEMU_BIN -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 \ +> +-cpu max -smp 8 -nographic -no-reboot \ +> +-kernel $KERNEL -bios $BIOS_BIN \ +> +-drive if=none,file=$ROOTFS,format=qcow2,id=hd \ +> +-device virtio-blk-pci,drive=hd -append 'root=/dev/vda1 +> +nokaslr dyndbg="module cxl* +p"' \ +> +-object memory-backend-ram,size=4G,id=mem0 \ +> +-numa node,nodeid=0,cpus=0-7,memdev=mem0 \ +> +-net nic -net user,hostfwd=tcp::2222-:22 -enable-kvm \ +> +-object +> +memory-backend-file,id=cxl-mem0,share=on,mem-path=/tmp/cxltest.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-mem1,share=on,mem-path=/tmp/cxltest1.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-mem2,share=on,mem-path=/tmp/cxltest2.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-mem3,share=on,mem-path=/tmp/cxltest3.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-lsa0,share=on,mem-path=/tmp/lsa0.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/lsa1.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-lsa2,share=on,mem-path=/tmp/lsa2.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-lsa3,share=on,mem-path=/tmp/lsa3.raw,size=256M +> +\ +> +-device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \ +> +-device cxl-rp,port=0,bus=cxl.1,id=root_port0,chassis=0,slot=0 \ +Probably not related to your problem, but there is a disconnect in QEMU / +kernel assumptionsaround the presence of an HDM decoder when a HB only +has a single root port. Spec allows it to be provided or not as an +implementation choice. +Kernel assumes it isn't provide. Qemu assumes it is. + +The temporary solution is to throw in a second root port on the HB and not +connect anything to it. Longer term I may special case this so that the +particular +decoder defaults to pass through settings in QEMU if there is only one root +port. + +> +-device cxl-upstream,bus=root_port0,id=us0 \ +> +-device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \ +> +-device +> +cxl-type3,bus=swport0,memdev=cxl-mem0,lsa=cxl-lsa0,id=cxl-pmem0 \ +> +-device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \ +> +-device +> +cxl-type3,bus=swport1,memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem1 \ +> +-device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \ +> +-device +> +cxl-type3,bus=swport2,memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem2 \ +> +-device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \ +> +-device +> +cxl-type3,bus=swport3,memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3 \ +> +-M +> +cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k +> +> +And I have got two problems. +> +1. When I want to create x1 region with command: "cxl create-region -d +> +decoder0.0 -w 1 -g 4096 mem0", kernel crashed with null pointer +> +reference. Crash log: +> +> +[ 534.697324] cxl_region region0: config state: 0 +> +[ 534.697346] cxl_region region0: probe: -6 +Seems odd this is up here. But maybe fine. + +> +[ 534.697368] cxl_acpi ACPI0017:00: decoder0.0: created region0 +> +[ 534.699115] cxl region0: mem0:endpoint3 decoder3.0 add: +> +mem0:decoder3.0 @ 0 next: none nr_eps: 1 nr_targets: 1 +> +[ 534.699149] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +> +mem0:decoder3.0 @ 0 next: mem0 nr_eps: 1 nr_targets: 1 +> +[ 534.699167] cxl region0: ACPI0016:00:port1 decoder1.0 add: +> +mem0:decoder3.0 @ 0 next: 0000:0d:00.0 nr_eps: 1 nr_targets: 1 +> +[ 534.699176] cxl region0: ACPI0016:00:port1 iw: 1 ig: 256 +> +[ 534.699182] cxl region0: ACPI0016:00:port1 target[0] = 0000:0c:00.0 +> +for mem0:decoder3.0 @ 0 +> +[ 534.699189] cxl region0: 0000:0d:00.0:port2 iw: 1 ig: 256 +> +[ 534.699193] cxl region0: 0000:0d:00.0:port2 target[0] = +> +0000:0e:00.0 for mem0:decoder3.0 @ 0 +> +[ 534.699405] Unable to handle kernel NULL pointer dereference at +> +virtual address 0000000000000000 +> +[ 534.701474] Mem abort info: +> +[ 534.701994] ESR = 0x0000000086000004 +> +[ 534.702653] EC = 0x21: IABT (current EL), IL = 32 bits +> +[ 534.703616] SET = 0, FnV = 0 +> +[ 534.704174] EA = 0, S1PTW = 0 +> +[ 534.704803] FSC = 0x04: level 0 translation fault +> +[ 534.705694] user pgtable: 4k pages, 48-bit VAs, pgdp=000000010144a000 +> +[ 534.706875] [0000000000000000] pgd=0000000000000000, p4d=0000000000000000 +> +[ 534.709855] Internal error: Oops: 86000004 [#1] PREEMPT SMP +> +[ 534.710301] Modules linked in: +> +[ 534.710546] CPU: 7 PID: 331 Comm: cxl Not tainted +> +5.19.0-rc3-00064-g65fc1c3d26b9-dirty #11 +> +[ 534.715393] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 +> +[ 534.717179] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) +> +[ 534.719190] pc : 0x0 +> +[ 534.719928] lr : commit_store+0x118/0x2cc +> +[ 534.721007] sp : ffff80000aec3c30 +> +[ 534.721793] x29: ffff80000aec3c30 x28: ffff0000da62e740 x27: +> +ffff0000c0c06b30 +> +[ 534.723875] x26: 0000000000000000 x25: ffff0000c0a2a400 x24: +> +ffff0000c0a29400 +> +[ 534.725440] x23: 0000000000000003 x22: 0000000000000000 x21: +> +ffff0000c0c06800 +> +[ 534.727312] x20: 0000000000000000 x19: ffff0000c1559800 x18: +> +0000000000000000 +> +[ 534.729138] x17: 0000000000000000 x16: 0000000000000000 x15: +> +0000ffffd41fe838 +> +[ 534.731046] x14: 0000000000000000 x13: 0000000000000000 x12: +> +0000000000000000 +> +[ 534.732402] x11: 0000000000000000 x10: 0000000000000000 x9 : +> +0000000000000000 +> +[ 534.734432] x8 : 0000000000000000 x7 : 0000000000000000 x6 : +> +ffff0000c0906e80 +> +[ 534.735921] x5 : 0000000000000000 x4 : 0000000000000000 x3 : +> +ffff80000aec3bf0 +> +[ 534.737437] x2 : 0000000000000000 x1 : 0000000000000000 x0 : +> +ffff0000c155a000 +> +[ 534.738878] Call trace: +> +[ 534.739368] 0x0 +> +[ 534.739713] dev_attr_store+0x1c/0x30 +> +[ 534.740186] sysfs_kf_write+0x48/0x58 +> +[ 534.740961] kernfs_fop_write_iter+0x128/0x184 +> +[ 534.741872] new_sync_write+0xdc/0x158 +> +[ 534.742706] vfs_write+0x1ac/0x2a8 +> +[ 534.743440] ksys_write+0x68/0xf0 +> +[ 534.744328] __arm64_sys_write+0x1c/0x28 +> +[ 534.745180] invoke_syscall+0x44/0xf0 +> +[ 534.745989] el0_svc_common+0x4c/0xfc +> +[ 534.746661] do_el0_svc+0x60/0xa8 +> +[ 534.747378] el0_svc+0x2c/0x78 +> +[ 534.748066] el0t_64_sync_handler+0xb8/0x12c +> +[ 534.748919] el0t_64_sync+0x18c/0x190 +> +[ 534.749629] Code: bad PC value +> +[ 534.750169] ---[ end trace 0000000000000000 ]--- +> +> +2. When I want to create x4 region with command: "cxl create-region -d +> +decoder0.0 -w 4 -g 4096 -m mem0 mem1 mem2 mem3". I got below errors: +> +> +cxl region: create_region: region0: failed to set target3 to mem3 +> +cxl region: cmd_create_region: created 0 regions +> +> +And kernel log as below: +> +[ 60.536663] cxl_region region0: config state: 0 +> +[ 60.536675] cxl_region region0: probe: -6 +> +[ 60.536696] cxl_acpi ACPI0017:00: decoder0.0: created region0 +> +[ 60.538251] cxl region0: mem0:endpoint3 decoder3.0 add: +> +mem0:decoder3.0 @ 0 next: none nr_eps: 1 nr_targets: 1 +> +[ 60.538278] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +> +mem0:decoder3.0 @ 0 next: mem0 nr_eps: 1 nr_targets: 1 +> +[ 60.538295] cxl region0: ACPI0016:00:port1 decoder1.0 add: +> +mem0:decoder3.0 @ 0 next: 0000:0d:00.0 nr_eps: 1 nr_targets: 1 +> +[ 60.538647] cxl region0: mem1:endpoint4 decoder4.0 add: +> +mem1:decoder4.0 @ 1 next: none nr_eps: 1 nr_targets: 1 +> +[ 60.538663] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +> +mem1:decoder4.0 @ 1 next: mem1 nr_eps: 2 nr_targets: 2 +> +[ 60.538675] cxl region0: ACPI0016:00:port1 decoder1.0 add: +> +mem1:decoder4.0 @ 1 next: 0000:0d:00.0 nr_eps: 2 nr_targets: 1 +> +[ 60.539311] cxl region0: mem2:endpoint5 decoder5.0 add: +> +mem2:decoder5.0 @ 2 next: none nr_eps: 1 nr_targets: 1 +> +[ 60.539332] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +> +mem2:decoder5.0 @ 2 next: mem2 nr_eps: 3 nr_targets: 3 +> +[ 60.539343] cxl region0: ACPI0016:00:port1 decoder1.0 add: +> +mem2:decoder5.0 @ 2 next: 0000:0d:00.0 nr_eps: 3 nr_targets: 1 +> +[ 60.539711] cxl region0: mem3:endpoint6 decoder6.0 add: +> +mem3:decoder6.0 @ 3 next: none nr_eps: 1 nr_targets: 1 +> +[ 60.539723] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +> +mem3:decoder6.0 @ 3 next: mem3 nr_eps: 4 nr_targets: 4 +> +[ 60.539735] cxl region0: ACPI0016:00:port1 decoder1.0 add: +> +mem3:decoder6.0 @ 3 next: 0000:0d:00.0 nr_eps: 4 nr_targets: 1 +> +[ 60.539742] cxl region0: ACPI0016:00:port1 iw: 1 ig: 256 +> +[ 60.539747] cxl region0: ACPI0016:00:port1 target[0] = 0000:0c:00.0 +> +for mem0:decoder3.0 @ 0 +> +[ 60.539754] cxl region0: 0000:0d:00.0:port2 iw: 4 ig: 512 +This looks like off by 1 that should be fixed in the below mentioned +cxl/pending branch. That ig should be 256. Note the fix was +for a test case with a fat HB and no switch, but certainly looks +like this is the same issue. + +> +[ 60.539758] cxl region0: 0000:0d:00.0:port2 target[0] = +> +0000:0e:00.0 for mem0:decoder3.0 @ 0 +> +[ 60.539764] cxl region0: ACPI0016:00:port1: cannot host mem1:decoder4.0 at +> +1 +> +> +I have tried to write sysfs node manually, got same errors. +When stepping through by hand, which sysfs write triggers the crash above? + +Not sure it's related, but I've just sent out a fix to the +target register handling in QEMU. +20220808122051.14822-1-Jonathan.Cameron@huawei.com +/T/#m47ff985412ce44559e6b04d677c302f8cd371330">https://lore.kernel.org/linux-cxl/ +20220808122051.14822-1-Jonathan.Cameron@huawei.com +/T/#m47ff985412ce44559e6b04d677c302f8cd371330 +I did have one instance last week of triggering what looked to be a race +condition but +the stack trace doesn't looks related to what you've hit. + +It will probably be a few days before I have time to take a look at replicating +what you have seen. + +If you have time, try using the kernel.org cxl/pending branch as there are +a few additional fixes on there since you sent this email. Optimistic to hope +this is covered by one of those, but at least it will mean we are trying to +replicate +on same branch. + +Jonathan + + +> +> +Hope I can get some helps here. +> +> +Bob + +Hi Jonathan + +Thanks for your reply! + +On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +<Jonathan.Cameron@huawei.com> wrote: +> +> +Probably not related to your problem, but there is a disconnect in QEMU / +> +kernel assumptionsaround the presence of an HDM decoder when a HB only +> +has a single root port. Spec allows it to be provided or not as an +> +implementation choice. +> +Kernel assumes it isn't provide. Qemu assumes it is. +> +> +The temporary solution is to throw in a second root port on the HB and not +> +connect anything to it. Longer term I may special case this so that the +> +particular +> +decoder defaults to pass through settings in QEMU if there is only one root +> +port. +> +You are right! After adding an extra HB in qemu, I can create a x1 +region successfully. +But have some errors in Nvdimm: + +[ 74.925838] Unknown online node for memory at 0x10000000000, assuming node 0 +[ 74.925846] Unknown target node for memory at 0x10000000000, assuming node 0 +[ 74.927470] nd_region region0: nmem0: is disabled, failing probe + +And x4 region still failed with same errors, using latest cxl/preview +branch don't work. +I have picked "Two CXL emulation fixes" patches in qemu, still not working. + +Bob + +On Tue, 9 Aug 2022 21:07:06 +0800 +Bobo WL <lmw.bobo@gmail.com> wrote: + +> +Hi Jonathan +> +> +Thanks for your reply! +> +> +On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +<Jonathan.Cameron@huawei.com> wrote: +> +> +> +> Probably not related to your problem, but there is a disconnect in QEMU / +> +> kernel assumptionsaround the presence of an HDM decoder when a HB only +> +> has a single root port. Spec allows it to be provided or not as an +> +> implementation choice. +> +> Kernel assumes it isn't provide. Qemu assumes it is. +> +> +> +> The temporary solution is to throw in a second root port on the HB and not +> +> connect anything to it. Longer term I may special case this so that the +> +> particular +> +> decoder defaults to pass through settings in QEMU if there is only one root +> +> port. +> +> +> +> +You are right! After adding an extra HB in qemu, I can create a x1 +> +region successfully. +> +But have some errors in Nvdimm: +> +> +[ 74.925838] Unknown online node for memory at 0x10000000000, assuming node > 0 +> +[ 74.925846] Unknown target node for memory at 0x10000000000, assuming node > 0 +> +[ 74.927470] nd_region region0: nmem0: is disabled, failing probe +Ah. I've seen this one, but not chased it down yet. Was on my todo list to +chase +down. Once I reach this state I can verify the HDM Decode is correct which is +what +I've been using to test (Which wasn't true until earlier this week). +I'm currently testing via devmem, more for historical reasons than because it +makes +that much sense anymore. + +> +> +And x4 region still failed with same errors, using latest cxl/preview +> +branch don't work. +> +I have picked "Two CXL emulation fixes" patches in qemu, still not working. +> +> +Bob + +On Tue, 9 Aug 2022 17:08:25 +0100 +Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: + +> +On Tue, 9 Aug 2022 21:07:06 +0800 +> +Bobo WL <lmw.bobo@gmail.com> wrote: +> +> +> Hi Jonathan +> +> +> +> Thanks for your reply! +> +> +> +> On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +> <Jonathan.Cameron@huawei.com> wrote: +> +> > +> +> > Probably not related to your problem, but there is a disconnect in QEMU / +> +> > kernel assumptionsaround the presence of an HDM decoder when a HB only +> +> > has a single root port. Spec allows it to be provided or not as an +> +> > implementation choice. +> +> > Kernel assumes it isn't provide. Qemu assumes it is. +> +> > +> +> > The temporary solution is to throw in a second root port on the HB and not +> +> > connect anything to it. Longer term I may special case this so that the +> +> > particular +> +> > decoder defaults to pass through settings in QEMU if there is only one +> +> > root port. +> +> > +> +> +> +> You are right! After adding an extra HB in qemu, I can create a x1 +> +> region successfully. +> +> But have some errors in Nvdimm: +> +> +> +> [ 74.925838] Unknown online node for memory at 0x10000000000, assuming +> +> node 0 +> +> [ 74.925846] Unknown target node for memory at 0x10000000000, assuming +> +> node 0 +> +> [ 74.927470] nd_region region0: nmem0: is disabled, failing probe +> +> +Ah. I've seen this one, but not chased it down yet. Was on my todo list to +> +chase +> +down. Once I reach this state I can verify the HDM Decode is correct which is +> +what +> +I've been using to test (Which wasn't true until earlier this week). +> +I'm currently testing via devmem, more for historical reasons than because it +> +makes +> +that much sense anymore. +*embarassed cough*. We haven't fully hooked the LSA up in qemu yet. +I'd forgotten that was still on the todo list. I don't think it will +be particularly hard to do and will take a look in next few days. + +Very very indirectly this error is causing a driver probe fail that means that +we hit a code path that has a rather odd looking check on NDD_LABELING. +Should not have gotten near that path though - hence the problem is actually +when we call cxl_pmem_get_config_data() and it returns an error because +we haven't fully connected up the command in QEMU. + +Jonathan + + +> +> +> +> +> And x4 region still failed with same errors, using latest cxl/preview +> +> branch don't work. +> +> I have picked "Two CXL emulation fixes" patches in qemu, still not working. +> +> +> +> Bob + +On Thu, 11 Aug 2022 18:08:57 +0100 +Jonathan Cameron via <qemu-devel@nongnu.org> wrote: + +> +On Tue, 9 Aug 2022 17:08:25 +0100 +> +Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: +> +> +> On Tue, 9 Aug 2022 21:07:06 +0800 +> +> Bobo WL <lmw.bobo@gmail.com> wrote: +> +> +> +> > Hi Jonathan +> +> > +> +> > Thanks for your reply! +> +> > +> +> > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +> > <Jonathan.Cameron@huawei.com> wrote: +> +> > > +> +> > > Probably not related to your problem, but there is a disconnect in QEMU +> +> > > / +> +> > > kernel assumptionsaround the presence of an HDM decoder when a HB only +> +> > > has a single root port. Spec allows it to be provided or not as an +> +> > > implementation choice. +> +> > > Kernel assumes it isn't provide. Qemu assumes it is. +> +> > > +> +> > > The temporary solution is to throw in a second root port on the HB and +> +> > > not +> +> > > connect anything to it. Longer term I may special case this so that +> +> > > the particular +> +> > > decoder defaults to pass through settings in QEMU if there is only one +> +> > > root port. +> +> > > +> +> > +> +> > You are right! After adding an extra HB in qemu, I can create a x1 +> +> > region successfully. +> +> > But have some errors in Nvdimm: +> +> > +> +> > [ 74.925838] Unknown online node for memory at 0x10000000000, assuming +> +> > node 0 +> +> > [ 74.925846] Unknown target node for memory at 0x10000000000, assuming +> +> > node 0 +> +> > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe +> +> +> +> Ah. I've seen this one, but not chased it down yet. Was on my todo list to +> +> chase +> +> down. Once I reach this state I can verify the HDM Decode is correct which +> +> is what +> +> I've been using to test (Which wasn't true until earlier this week). +> +> I'm currently testing via devmem, more for historical reasons than because +> +> it makes +> +> that much sense anymore. +> +> +*embarassed cough*. We haven't fully hooked the LSA up in qemu yet. +> +I'd forgotten that was still on the todo list. I don't think it will +> +be particularly hard to do and will take a look in next few days. +> +> +Very very indirectly this error is causing a driver probe fail that means that +> +we hit a code path that has a rather odd looking check on NDD_LABELING. +> +Should not have gotten near that path though - hence the problem is actually +> +when we call cxl_pmem_get_config_data() and it returns an error because +> +we haven't fully connected up the command in QEMU. +So a least one bug in QEMU. We were not supporting variable length payloads on +mailbox +inputs (but were on outputs). That hasn't mattered until we get to LSA writes. +We just need to relax condition on the supplied length. + +diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +index c352a935c4..fdda9529fe 100644 +--- a/hw/cxl/cxl-mailbox-utils.c ++++ b/hw/cxl/cxl-mailbox-utils.c +@@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) + cxl_cmd = &cxl_cmd_set[set][cmd]; + h = cxl_cmd->handler; + if (h) { +- if (len == cxl_cmd->in) { ++ if (len == cxl_cmd->in || !cxl_cmd->in) { + cxl_cmd->payload = cxl_dstate->mbox_reg_state + + A_CXL_DEV_CMD_PAYLOAD; + ret = (*h)(cxl_cmd, cxl_dstate, &len); + + +This lets the nvdimm/region probe fine, but I'm getting some issues with +namespace capacity so I'll look at what is causing that next. +Unfortunately I'm not that familiar with the driver/nvdimm side of things +so it's take a while to figure out what kicks off what! + +Jonathan + +> +> +Jonathan +> +> +> +> +> +> > +> +> > And x4 region still failed with same errors, using latest cxl/preview +> +> > branch don't work. +> +> > I have picked "Two CXL emulation fixes" patches in qemu, still not +> +> > working. +> +> > +> +> > Bob +> +> + +Jonathan Cameron wrote: +> +On Thu, 11 Aug 2022 18:08:57 +0100 +> +Jonathan Cameron via <qemu-devel@nongnu.org> wrote: +> +> +> On Tue, 9 Aug 2022 17:08:25 +0100 +> +> Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: +> +> +> +> > On Tue, 9 Aug 2022 21:07:06 +0800 +> +> > Bobo WL <lmw.bobo@gmail.com> wrote: +> +> > +> +> > > Hi Jonathan +> +> > > +> +> > > Thanks for your reply! +> +> > > +> +> > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +> > > <Jonathan.Cameron@huawei.com> wrote: +> +> > > > +> +> > > > Probably not related to your problem, but there is a disconnect in +> +> > > > QEMU / +> +> > > > kernel assumptionsaround the presence of an HDM decoder when a HB only +> +> > > > has a single root port. Spec allows it to be provided or not as an +> +> > > > implementation choice. +> +> > > > Kernel assumes it isn't provide. Qemu assumes it is. +> +> > > > +> +> > > > The temporary solution is to throw in a second root port on the HB +> +> > > > and not +> +> > > > connect anything to it. Longer term I may special case this so that +> +> > > > the particular +> +> > > > decoder defaults to pass through settings in QEMU if there is only +> +> > > > one root port. +> +> > > > +> +> > > +> +> > > You are right! After adding an extra HB in qemu, I can create a x1 +> +> > > region successfully. +> +> > > But have some errors in Nvdimm: +> +> > > +> +> > > [ 74.925838] Unknown online node for memory at 0x10000000000, +> +> > > assuming node 0 +> +> > > [ 74.925846] Unknown target node for memory at 0x10000000000, +> +> > > assuming node 0 +> +> > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe +> +> > +> +> > Ah. I've seen this one, but not chased it down yet. Was on my todo list +> +> > to chase +> +> > down. Once I reach this state I can verify the HDM Decode is correct +> +> > which is what +> +> > I've been using to test (Which wasn't true until earlier this week). +> +> > I'm currently testing via devmem, more for historical reasons than +> +> > because it makes +> +> > that much sense anymore. +> +> +> +> *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. +> +> I'd forgotten that was still on the todo list. I don't think it will +> +> be particularly hard to do and will take a look in next few days. +> +> +> +> Very very indirectly this error is causing a driver probe fail that means +> +> that +> +> we hit a code path that has a rather odd looking check on NDD_LABELING. +> +> Should not have gotten near that path though - hence the problem is actually +> +> when we call cxl_pmem_get_config_data() and it returns an error because +> +> we haven't fully connected up the command in QEMU. +> +> +So a least one bug in QEMU. We were not supporting variable length payloads +> +on mailbox +> +inputs (but were on outputs). That hasn't mattered until we get to LSA +> +writes. +> +We just need to relax condition on the supplied length. +> +> +diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +> +index c352a935c4..fdda9529fe 100644 +> +--- a/hw/cxl/cxl-mailbox-utils.c +> ++++ b/hw/cxl/cxl-mailbox-utils.c +> +@@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) +> +cxl_cmd = &cxl_cmd_set[set][cmd]; +> +h = cxl_cmd->handler; +> +if (h) { +> +- if (len == cxl_cmd->in) { +> ++ if (len == cxl_cmd->in || !cxl_cmd->in) { +> +cxl_cmd->payload = cxl_dstate->mbox_reg_state + +> +A_CXL_DEV_CMD_PAYLOAD; +> +ret = (*h)(cxl_cmd, cxl_dstate, &len); +> +> +> +This lets the nvdimm/region probe fine, but I'm getting some issues with +> +namespace capacity so I'll look at what is causing that next. +> +Unfortunately I'm not that familiar with the driver/nvdimm side of things +> +so it's take a while to figure out what kicks off what! +The whirlwind tour is that 'struct nd_region' instances that represent a +persitent memory address range are composed of one more mappings of +'struct nvdimm' objects. The nvdimm object is driven by the dimm driver +in drivers/nvdimm/dimm.c. That driver is mainly charged with unlocking +the dimm (if locked) and interrogating the label area to look for +namespace labels. + +The label command calls are routed to the '->ndctl()' callback that was +registered when the CXL nvdimm_bus_descriptor was created. That callback +handles both 'bus' scope calls, currently none for CXL, and per nvdimm +calls. cxl_pmem_nvdimm_ctl() translates those generic LIBNVDIMM commands +to CXL commands. + +The 'struct nvdimm' objects that the CXL side registers have the +NDD_LABELING flag set which means that namespaces need to be explicitly +created / provisioned from region capacity. Otherwise, if +drivers/nvdimm/dimm.c does not find a namespace-label-index block then +the region reverts to label-less mode and a default namespace equal to +the size of the region is instantiated. + +If you are seeing small mismatches in namespace capacity then it may +just be the fact that by default 'ndctl create-namespace' results in an +'fsdax' mode namespace which just means that it is a block device where +1.5% of the capacity is reserved for 'struct page' metadata. You should +be able to see namespace capacity == region capacity by doing "ndctl +create-namespace -m raw", and disable DAX operation. + +Hope that helps. + +On Fri, 12 Aug 2022 09:03:02 -0700 +Dan Williams <dan.j.williams@intel.com> wrote: + +> +Jonathan Cameron wrote: +> +> On Thu, 11 Aug 2022 18:08:57 +0100 +> +> Jonathan Cameron via <qemu-devel@nongnu.org> wrote: +> +> +> +> > On Tue, 9 Aug 2022 17:08:25 +0100 +> +> > Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: +> +> > +> +> > > On Tue, 9 Aug 2022 21:07:06 +0800 +> +> > > Bobo WL <lmw.bobo@gmail.com> wrote: +> +> > > +> +> > > > Hi Jonathan +> +> > > > +> +> > > > Thanks for your reply! +> +> > > > +> +> > > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +> > > > <Jonathan.Cameron@huawei.com> wrote: +> +> > > > > +> +> > > > > Probably not related to your problem, but there is a disconnect in +> +> > > > > QEMU / +> +> > > > > kernel assumptionsaround the presence of an HDM decoder when a HB +> +> > > > > only +> +> > > > > has a single root port. Spec allows it to be provided or not as an +> +> > > > > implementation choice. +> +> > > > > Kernel assumes it isn't provide. Qemu assumes it is. +> +> > > > > +> +> > > > > The temporary solution is to throw in a second root port on the HB +> +> > > > > and not +> +> > > > > connect anything to it. Longer term I may special case this so +> +> > > > > that the particular +> +> > > > > decoder defaults to pass through settings in QEMU if there is only +> +> > > > > one root port. +> +> > > > > +> +> > > > +> +> > > > You are right! After adding an extra HB in qemu, I can create a x1 +> +> > > > region successfully. +> +> > > > But have some errors in Nvdimm: +> +> > > > +> +> > > > [ 74.925838] Unknown online node for memory at 0x10000000000, +> +> > > > assuming node 0 +> +> > > > [ 74.925846] Unknown target node for memory at 0x10000000000, +> +> > > > assuming node 0 +> +> > > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe +> +> > > > +> +> > > +> +> > > Ah. I've seen this one, but not chased it down yet. Was on my todo +> +> > > list to chase +> +> > > down. Once I reach this state I can verify the HDM Decode is correct +> +> > > which is what +> +> > > I've been using to test (Which wasn't true until earlier this week). +> +> > > I'm currently testing via devmem, more for historical reasons than +> +> > > because it makes +> +> > > that much sense anymore. +> +> > +> +> > *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. +> +> > I'd forgotten that was still on the todo list. I don't think it will +> +> > be particularly hard to do and will take a look in next few days. +> +> > +> +> > Very very indirectly this error is causing a driver probe fail that means +> +> > that +> +> > we hit a code path that has a rather odd looking check on NDD_LABELING. +> +> > Should not have gotten near that path though - hence the problem is +> +> > actually +> +> > when we call cxl_pmem_get_config_data() and it returns an error because +> +> > we haven't fully connected up the command in QEMU. +> +> +> +> So a least one bug in QEMU. We were not supporting variable length payloads +> +> on mailbox +> +> inputs (but were on outputs). That hasn't mattered until we get to LSA +> +> writes. +> +> We just need to relax condition on the supplied length. +> +> +> +> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +> +> index c352a935c4..fdda9529fe 100644 +> +> --- a/hw/cxl/cxl-mailbox-utils.c +> +> +++ b/hw/cxl/cxl-mailbox-utils.c +> +> @@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) +> +> cxl_cmd = &cxl_cmd_set[set][cmd]; +> +> h = cxl_cmd->handler; +> +> if (h) { +> +> - if (len == cxl_cmd->in) { +> +> + if (len == cxl_cmd->in || !cxl_cmd->in) { +> +> cxl_cmd->payload = cxl_dstate->mbox_reg_state + +> +> A_CXL_DEV_CMD_PAYLOAD; +> +> ret = (*h)(cxl_cmd, cxl_dstate, &len); +> +> +> +> +> +> This lets the nvdimm/region probe fine, but I'm getting some issues with +> +> namespace capacity so I'll look at what is causing that next. +> +> Unfortunately I'm not that familiar with the driver/nvdimm side of things +> +> so it's take a while to figure out what kicks off what! +> +> +The whirlwind tour is that 'struct nd_region' instances that represent a +> +persitent memory address range are composed of one more mappings of +> +'struct nvdimm' objects. The nvdimm object is driven by the dimm driver +> +in drivers/nvdimm/dimm.c. That driver is mainly charged with unlocking +> +the dimm (if locked) and interrogating the label area to look for +> +namespace labels. +> +> +The label command calls are routed to the '->ndctl()' callback that was +> +registered when the CXL nvdimm_bus_descriptor was created. That callback +> +handles both 'bus' scope calls, currently none for CXL, and per nvdimm +> +calls. cxl_pmem_nvdimm_ctl() translates those generic LIBNVDIMM commands +> +to CXL commands. +> +> +The 'struct nvdimm' objects that the CXL side registers have the +> +NDD_LABELING flag set which means that namespaces need to be explicitly +> +created / provisioned from region capacity. Otherwise, if +> +drivers/nvdimm/dimm.c does not find a namespace-label-index block then +> +the region reverts to label-less mode and a default namespace equal to +> +the size of the region is instantiated. +> +> +If you are seeing small mismatches in namespace capacity then it may +> +just be the fact that by default 'ndctl create-namespace' results in an +> +'fsdax' mode namespace which just means that it is a block device where +> +1.5% of the capacity is reserved for 'struct page' metadata. You should +> +be able to see namespace capacity == region capacity by doing "ndctl +> +create-namespace -m raw", and disable DAX operation. +Currently ndctl create-namespace crashes qemu ;) +Which isn't ideal! + +> +> +Hope that helps. +Got me looking at the right code. Thanks! + +Jonathan + +On Fri, 12 Aug 2022 17:15:09 +0100 +Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: + +> +On Fri, 12 Aug 2022 09:03:02 -0700 +> +Dan Williams <dan.j.williams@intel.com> wrote: +> +> +> Jonathan Cameron wrote: +> +> > On Thu, 11 Aug 2022 18:08:57 +0100 +> +> > Jonathan Cameron via <qemu-devel@nongnu.org> wrote: +> +> > +> +> > > On Tue, 9 Aug 2022 17:08:25 +0100 +> +> > > Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: +> +> > > +> +> > > > On Tue, 9 Aug 2022 21:07:06 +0800 +> +> > > > Bobo WL <lmw.bobo@gmail.com> wrote: +> +> > > > +> +> > > > > Hi Jonathan +> +> > > > > +> +> > > > > Thanks for your reply! +> +> > > > > +> +> > > > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +> > > > > <Jonathan.Cameron@huawei.com> wrote: +> +> > > > > > +> +> > > > > > Probably not related to your problem, but there is a disconnect +> +> > > > > > in QEMU / +> +> > > > > > kernel assumptionsaround the presence of an HDM decoder when a HB +> +> > > > > > only +> +> > > > > > has a single root port. Spec allows it to be provided or not as +> +> > > > > > an implementation choice. +> +> > > > > > Kernel assumes it isn't provide. Qemu assumes it is. +> +> > > > > > +> +> > > > > > The temporary solution is to throw in a second root port on the +> +> > > > > > HB and not +> +> > > > > > connect anything to it. Longer term I may special case this so +> +> > > > > > that the particular +> +> > > > > > decoder defaults to pass through settings in QEMU if there is +> +> > > > > > only one root port. +> +> > > > > > +> +> > > > > +> +> > > > > You are right! After adding an extra HB in qemu, I can create a x1 +> +> > > > > region successfully. +> +> > > > > But have some errors in Nvdimm: +> +> > > > > +> +> > > > > [ 74.925838] Unknown online node for memory at 0x10000000000, +> +> > > > > assuming node 0 +> +> > > > > [ 74.925846] Unknown target node for memory at 0x10000000000, +> +> > > > > assuming node 0 +> +> > > > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe +> +> > > > > +> +> > > > +> +> > > > Ah. I've seen this one, but not chased it down yet. Was on my todo +> +> > > > list to chase +> +> > > > down. Once I reach this state I can verify the HDM Decode is correct +> +> > > > which is what +> +> > > > I've been using to test (Which wasn't true until earlier this week). +> +> > > > I'm currently testing via devmem, more for historical reasons than +> +> > > > because it makes +> +> > > > that much sense anymore. +> +> > > +> +> > > *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. +> +> > > I'd forgotten that was still on the todo list. I don't think it will +> +> > > be particularly hard to do and will take a look in next few days. +> +> > > +> +> > > Very very indirectly this error is causing a driver probe fail that +> +> > > means that +> +> > > we hit a code path that has a rather odd looking check on NDD_LABELING. +> +> > > Should not have gotten near that path though - hence the problem is +> +> > > actually +> +> > > when we call cxl_pmem_get_config_data() and it returns an error because +> +> > > we haven't fully connected up the command in QEMU. +> +> > +> +> > So a least one bug in QEMU. We were not supporting variable length +> +> > payloads on mailbox +> +> > inputs (but were on outputs). That hasn't mattered until we get to LSA +> +> > writes. +> +> > We just need to relax condition on the supplied length. +> +> > +> +> > diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +> +> > index c352a935c4..fdda9529fe 100644 +> +> > --- a/hw/cxl/cxl-mailbox-utils.c +> +> > +++ b/hw/cxl/cxl-mailbox-utils.c +> +> > @@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) +> +> > cxl_cmd = &cxl_cmd_set[set][cmd]; +> +> > h = cxl_cmd->handler; +> +> > if (h) { +> +> > - if (len == cxl_cmd->in) { +> +> > + if (len == cxl_cmd->in || !cxl_cmd->in) { +> +> > cxl_cmd->payload = cxl_dstate->mbox_reg_state + +> +> > A_CXL_DEV_CMD_PAYLOAD; +> +> > ret = (*h)(cxl_cmd, cxl_dstate, &len); +> +> > +> +> > +> +> > This lets the nvdimm/region probe fine, but I'm getting some issues with +> +> > namespace capacity so I'll look at what is causing that next. +> +> > Unfortunately I'm not that familiar with the driver/nvdimm side of things +> +> > so it's take a while to figure out what kicks off what! +> +> +> +> The whirlwind tour is that 'struct nd_region' instances that represent a +> +> persitent memory address range are composed of one more mappings of +> +> 'struct nvdimm' objects. The nvdimm object is driven by the dimm driver +> +> in drivers/nvdimm/dimm.c. That driver is mainly charged with unlocking +> +> the dimm (if locked) and interrogating the label area to look for +> +> namespace labels. +> +> +> +> The label command calls are routed to the '->ndctl()' callback that was +> +> registered when the CXL nvdimm_bus_descriptor was created. That callback +> +> handles both 'bus' scope calls, currently none for CXL, and per nvdimm +> +> calls. cxl_pmem_nvdimm_ctl() translates those generic LIBNVDIMM commands +> +> to CXL commands. +> +> +> +> The 'struct nvdimm' objects that the CXL side registers have the +> +> NDD_LABELING flag set which means that namespaces need to be explicitly +> +> created / provisioned from region capacity. Otherwise, if +> +> drivers/nvdimm/dimm.c does not find a namespace-label-index block then +> +> the region reverts to label-less mode and a default namespace equal to +> +> the size of the region is instantiated. +> +> +> +> If you are seeing small mismatches in namespace capacity then it may +> +> just be the fact that by default 'ndctl create-namespace' results in an +> +> 'fsdax' mode namespace which just means that it is a block device where +> +> 1.5% of the capacity is reserved for 'struct page' metadata. You should +> +> be able to see namespace capacity == region capacity by doing "ndctl +> +> create-namespace -m raw", and disable DAX operation. +> +> +Currently ndctl create-namespace crashes qemu ;) +> +Which isn't ideal! +> +Found a cause for this one. Mailbox payload may be as small as 256 bytes. +We have code in kernel sanity checking that output payload fits in the +mailbox, but nothing on the input payload. Symptom is that we write just +off the end whatever size the payload is. Note doing this shouldn't crash +qemu - so I need to fix a range check somewhere. + +I think this is because cxl_pmem_get_config_size() returns the mailbox +payload size as being the available LSA size, forgetting to remove the +size of the headers on the set_lsa side of things. +https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git/tree/drivers/cxl/pmem.c?h=next#n110 +I've hacked the max_payload to be -8 + +Now we still don't succeed in creating the namespace, but bonus is it doesn't +crash any more. + + +Jonathan + + + +> +> +> +> Hope that helps. +> +Got me looking at the right code. Thanks! +> +> +Jonathan +> +> + +On Mon, 15 Aug 2022 15:18:09 +0100 +Jonathan Cameron via <qemu-devel@nongnu.org> wrote: + +> +On Fri, 12 Aug 2022 17:15:09 +0100 +> +Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: +> +> +> On Fri, 12 Aug 2022 09:03:02 -0700 +> +> Dan Williams <dan.j.williams@intel.com> wrote: +> +> +> +> > Jonathan Cameron wrote: +> +> > > On Thu, 11 Aug 2022 18:08:57 +0100 +> +> > > Jonathan Cameron via <qemu-devel@nongnu.org> wrote: +> +> > > +> +> > > > On Tue, 9 Aug 2022 17:08:25 +0100 +> +> > > > Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: +> +> > > > +> +> > > > > On Tue, 9 Aug 2022 21:07:06 +0800 +> +> > > > > Bobo WL <lmw.bobo@gmail.com> wrote: +> +> > > > > +> +> > > > > > Hi Jonathan +> +> > > > > > +> +> > > > > > Thanks for your reply! +> +> > > > > > +> +> > > > > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +> > > > > > <Jonathan.Cameron@huawei.com> wrote: +> +> > > > > > > +> +> > > > > > > Probably not related to your problem, but there is a disconnect +> +> > > > > > > in QEMU / +> +> > > > > > > kernel assumptionsaround the presence of an HDM decoder when a +> +> > > > > > > HB only +> +> > > > > > > has a single root port. Spec allows it to be provided or not as +> +> > > > > > > an implementation choice. +> +> > > > > > > Kernel assumes it isn't provide. Qemu assumes it is. +> +> > > > > > > +> +> > > > > > > The temporary solution is to throw in a second root port on the +> +> > > > > > > HB and not +> +> > > > > > > connect anything to it. Longer term I may special case this so +> +> > > > > > > that the particular +> +> > > > > > > decoder defaults to pass through settings in QEMU if there is +> +> > > > > > > only one root port. +> +> > > > > > > +> +> > > > > > +> +> > > > > > You are right! After adding an extra HB in qemu, I can create a x1 +> +> > > > > > region successfully. +> +> > > > > > But have some errors in Nvdimm: +> +> > > > > > +> +> > > > > > [ 74.925838] Unknown online node for memory at 0x10000000000, +> +> > > > > > assuming node 0 +> +> > > > > > [ 74.925846] Unknown target node for memory at 0x10000000000, +> +> > > > > > assuming node 0 +> +> > > > > > [ 74.927470] nd_region region0: nmem0: is disabled, failing +> +> > > > > > probe +> +> > > > > +> +> > > > > Ah. I've seen this one, but not chased it down yet. Was on my todo +> +> > > > > list to chase +> +> > > > > down. Once I reach this state I can verify the HDM Decode is +> +> > > > > correct which is what +> +> > > > > I've been using to test (Which wasn't true until earlier this +> +> > > > > week). +> +> > > > > I'm currently testing via devmem, more for historical reasons than +> +> > > > > because it makes +> +> > > > > that much sense anymore. +> +> > > > +> +> > > > *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. +> +> > > > I'd forgotten that was still on the todo list. I don't think it will +> +> > > > be particularly hard to do and will take a look in next few days. +> +> > > > +> +> > > > Very very indirectly this error is causing a driver probe fail that +> +> > > > means that +> +> > > > we hit a code path that has a rather odd looking check on +> +> > > > NDD_LABELING. +> +> > > > Should not have gotten near that path though - hence the problem is +> +> > > > actually +> +> > > > when we call cxl_pmem_get_config_data() and it returns an error +> +> > > > because +> +> > > > we haven't fully connected up the command in QEMU. +> +> > > +> +> > > So a least one bug in QEMU. We were not supporting variable length +> +> > > payloads on mailbox +> +> > > inputs (but were on outputs). That hasn't mattered until we get to LSA +> +> > > writes. +> +> > > We just need to relax condition on the supplied length. +> +> > > +> +> > > diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +> +> > > index c352a935c4..fdda9529fe 100644 +> +> > > --- a/hw/cxl/cxl-mailbox-utils.c +> +> > > +++ b/hw/cxl/cxl-mailbox-utils.c +> +> > > @@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) +> +> > > cxl_cmd = &cxl_cmd_set[set][cmd]; +> +> > > h = cxl_cmd->handler; +> +> > > if (h) { +> +> > > - if (len == cxl_cmd->in) { +> +> > > + if (len == cxl_cmd->in || !cxl_cmd->in) { +> +> > > cxl_cmd->payload = cxl_dstate->mbox_reg_state + +> +> > > A_CXL_DEV_CMD_PAYLOAD; +> +> > > ret = (*h)(cxl_cmd, cxl_dstate, &len); +> +> > > +> +> > > +> +> > > This lets the nvdimm/region probe fine, but I'm getting some issues with +> +> > > namespace capacity so I'll look at what is causing that next. +> +> > > Unfortunately I'm not that familiar with the driver/nvdimm side of +> +> > > things +> +> > > so it's take a while to figure out what kicks off what! +> +> > +> +> > The whirlwind tour is that 'struct nd_region' instances that represent a +> +> > persitent memory address range are composed of one more mappings of +> +> > 'struct nvdimm' objects. The nvdimm object is driven by the dimm driver +> +> > in drivers/nvdimm/dimm.c. That driver is mainly charged with unlocking +> +> > the dimm (if locked) and interrogating the label area to look for +> +> > namespace labels. +> +> > +> +> > The label command calls are routed to the '->ndctl()' callback that was +> +> > registered when the CXL nvdimm_bus_descriptor was created. That callback +> +> > handles both 'bus' scope calls, currently none for CXL, and per nvdimm +> +> > calls. cxl_pmem_nvdimm_ctl() translates those generic LIBNVDIMM commands +> +> > to CXL commands. +> +> > +> +> > The 'struct nvdimm' objects that the CXL side registers have the +> +> > NDD_LABELING flag set which means that namespaces need to be explicitly +> +> > created / provisioned from region capacity. Otherwise, if +> +> > drivers/nvdimm/dimm.c does not find a namespace-label-index block then +> +> > the region reverts to label-less mode and a default namespace equal to +> +> > the size of the region is instantiated. +> +> > +> +> > If you are seeing small mismatches in namespace capacity then it may +> +> > just be the fact that by default 'ndctl create-namespace' results in an +> +> > 'fsdax' mode namespace which just means that it is a block device where +> +> > 1.5% of the capacity is reserved for 'struct page' metadata. You should +> +> > be able to see namespace capacity == region capacity by doing "ndctl +> +> > create-namespace -m raw", and disable DAX operation. +> +> +> +> Currently ndctl create-namespace crashes qemu ;) +> +> Which isn't ideal! +> +> +> +> +Found a cause for this one. Mailbox payload may be as small as 256 bytes. +> +We have code in kernel sanity checking that output payload fits in the +> +mailbox, but nothing on the input payload. Symptom is that we write just +> +off the end whatever size the payload is. Note doing this shouldn't crash +> +qemu - so I need to fix a range check somewhere. +> +> +I think this is because cxl_pmem_get_config_size() returns the mailbox +> +payload size as being the available LSA size, forgetting to remove the +> +size of the headers on the set_lsa side of things. +> +https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git/tree/drivers/cxl/pmem.c?h=next#n110 +> +> +I've hacked the max_payload to be -8 +> +> +Now we still don't succeed in creating the namespace, but bonus is it doesn't +> +crash any more. +In the interests of defensive / correct handling from QEMU I took a +look into why it was crashing. Turns out that providing a NULL write callback +for +the memory device region (that the above overlarge write was spilling into) +isn't +a safe thing to do. Needs a stub. Oops. + +On plus side we might never have noticed this was going wrong without the crash +*silver lining in every cloud* + +Fix to follow... + +Jonathan + + +> +> +> +Jonathan +> +> +> +> +> > +> +> > Hope that helps. +> +> Got me looking at the right code. Thanks! +> +> +> +> Jonathan +> +> +> +> +> +> + +On Mon, 15 Aug 2022 at 15:55, Jonathan Cameron via <qemu-arm@nongnu.org> wrote: +> +In the interests of defensive / correct handling from QEMU I took a +> +look into why it was crashing. Turns out that providing a NULL write +> +callback for +> +the memory device region (that the above overlarge write was spilling into) +> +isn't +> +a safe thing to do. Needs a stub. Oops. +Yeah. We've talked before about adding an assert so that that kind of +"missing function" bug is caught at device creation rather than only +if the guest tries to access the device, but we never quite got around +to it... + +-- PMM + +On Fri, 12 Aug 2022 16:44:03 +0100 +Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: + +> +On Thu, 11 Aug 2022 18:08:57 +0100 +> +Jonathan Cameron via <qemu-devel@nongnu.org> wrote: +> +> +> On Tue, 9 Aug 2022 17:08:25 +0100 +> +> Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: +> +> +> +> > On Tue, 9 Aug 2022 21:07:06 +0800 +> +> > Bobo WL <lmw.bobo@gmail.com> wrote: +> +> > +> +> > > Hi Jonathan +> +> > > +> +> > > Thanks for your reply! +> +> > > +> +> > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +> > > <Jonathan.Cameron@huawei.com> wrote: +> +> > > > +> +> > > > Probably not related to your problem, but there is a disconnect in +> +> > > > QEMU / +> +> > > > kernel assumptionsaround the presence of an HDM decoder when a HB only +> +> > > > has a single root port. Spec allows it to be provided or not as an +> +> > > > implementation choice. +> +> > > > Kernel assumes it isn't provide. Qemu assumes it is. +> +> > > > +> +> > > > The temporary solution is to throw in a second root port on the HB +> +> > > > and not +> +> > > > connect anything to it. Longer term I may special case this so that +> +> > > > the particular +> +> > > > decoder defaults to pass through settings in QEMU if there is only +> +> > > > one root port. +> +> > > > +> +> > > +> +> > > You are right! After adding an extra HB in qemu, I can create a x1 +> +> > > region successfully. +> +> > > But have some errors in Nvdimm: +> +> > > +> +> > > [ 74.925838] Unknown online node for memory at 0x10000000000, +> +> > > assuming node 0 +> +> > > [ 74.925846] Unknown target node for memory at 0x10000000000, +> +> > > assuming node 0 +> +> > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe +> +> > > +> +> > +> +> > Ah. I've seen this one, but not chased it down yet. Was on my todo list +> +> > to chase +> +> > down. Once I reach this state I can verify the HDM Decode is correct +> +> > which is what +> +> > I've been using to test (Which wasn't true until earlier this week). +> +> > I'm currently testing via devmem, more for historical reasons than +> +> > because it makes +> +> > that much sense anymore. +> +> +> +> *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. +> +> I'd forgotten that was still on the todo list. I don't think it will +> +> be particularly hard to do and will take a look in next few days. +> +> +> +> Very very indirectly this error is causing a driver probe fail that means +> +> that +> +> we hit a code path that has a rather odd looking check on NDD_LABELING. +> +> Should not have gotten near that path though - hence the problem is actually +> +> when we call cxl_pmem_get_config_data() and it returns an error because +> +> we haven't fully connected up the command in QEMU. +> +> +So a least one bug in QEMU. We were not supporting variable length payloads +> +on mailbox +> +inputs (but were on outputs). That hasn't mattered until we get to LSA +> +writes. +> +We just need to relax condition on the supplied length. +> +> +diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +> +index c352a935c4..fdda9529fe 100644 +> +--- a/hw/cxl/cxl-mailbox-utils.c +> ++++ b/hw/cxl/cxl-mailbox-utils.c +> +@@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) +> +cxl_cmd = &cxl_cmd_set[set][cmd]; +> +h = cxl_cmd->handler; +> +if (h) { +> +- if (len == cxl_cmd->in) { +> ++ if (len == cxl_cmd->in || !cxl_cmd->in) { +Fix is wrong as we use ~0 as the placeholder for variable payload, not 0. + +With that fixed we hit new fun paths - after some errors we get the +worrying - not totally sure but looks like a failure on an error cleanup. +I'll chase down the error source, but even then this is probably triggerable by +hardware problem or similar. Some bonus prints in here from me chasing +error paths, but it's otherwise just cxl/next + the fix I posted earlier today. + +[ 69.919877] nd_bus ndbus0: START: nd_region.probe(region0) +[ 69.920108] nd_region_probe +[ 69.920623] ------------[ cut here ]------------ +[ 69.920675] refcount_t: addition on 0; use-after-free. +[ 69.921314] WARNING: CPU: 3 PID: 710 at lib/refcount.c:25 +refcount_warn_saturate+0xa0/0x144 +[ 69.926949] Modules linked in: cxl_pmem cxl_mem cxl_pci cxl_port cxl_acpi +cxl_core +[ 69.928830] CPU: 3 PID: 710 Comm: kworker/u8:9 Not tainted 5.19.0-rc3+ #399 +[ 69.930596] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 +[ 69.931482] Workqueue: events_unbound async_run_entry_fn +[ 69.932403] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) +[ 69.934023] pc : refcount_warn_saturate+0xa0/0x144 +[ 69.935161] lr : refcount_warn_saturate+0xa0/0x144 +[ 69.936541] sp : ffff80000890b960 +[ 69.937921] x29: ffff80000890b960 x28: 0000000000000000 x27: 0000000000000000 +[ 69.940917] x26: ffffa54a90d5cb10 x25: ffffa54a90809e98 x24: 0000000000000000 +[ 69.942537] x23: ffffa54a91a3d8d8 x22: ffff0000c5254800 x21: ffff0000c5254800 +[ 69.944013] x20: ffff0000ce924180 x19: ffff0000c5254800 x18: ffffffffffffffff +[ 69.946100] x17: ffff5ab66e5ef000 x16: ffff80000801c000 x15: 0000000000000000 +[ 69.947585] x14: 0000000000000001 x13: 0a2e656572662d72 x12: 657466612d657375 +[ 69.948670] x11: 203b30206e6f206e x10: 6f69746964646120 x9 : ffffa54a8f63d288 +[ 69.950679] x8 : 206e6f206e6f6974 x7 : 69646461203a745f x6 : 00000000fffff31e +[ 69.952113] x5 : ffff0000ff61ba08 x4 : 00000000fffff31e x3 : ffff5ab66e5ef000 +root@debian:/sys/bus/cxl/devices/decoder0.0/region0# [ 69.954752] x2 : +0000000000000000 x1 : 0000000000000000 x0 : ffff0000c512e740 +[ 69.957098] Call trace: +[ 69.957959] refcount_warn_saturate+0xa0/0x144 +[ 69.958773] get_ndd+0x5c/0x80 +[ 69.959294] nd_region_register_namespaces+0xe4/0xe90 +[ 69.960253] nd_region_probe+0x100/0x290 +[ 69.960796] nvdimm_bus_probe+0xf4/0x1c0 +[ 69.962087] really_probe+0x19c/0x3f0 +[ 69.962620] __driver_probe_device+0x11c/0x190 +[ 69.963258] driver_probe_device+0x44/0xf4 +[ 69.963773] __device_attach_driver+0xa4/0x140 +[ 69.964471] bus_for_each_drv+0x84/0xe0 +[ 69.965068] __device_attach+0xb0/0x1f0 +[ 69.966101] device_initial_probe+0x20/0x30 +[ 69.967142] bus_probe_device+0xa4/0xb0 +[ 69.968104] device_add+0x3e8/0x910 +[ 69.969111] nd_async_device_register+0x24/0x74 +[ 69.969928] async_run_entry_fn+0x40/0x150 +[ 69.970725] process_one_work+0x1dc/0x450 +[ 69.971796] worker_thread+0x154/0x450 +[ 69.972700] kthread+0x118/0x120 +[ 69.974141] ret_from_fork+0x10/0x20 +[ 69.975141] ---[ end trace 0000000000000000 ]--- +[ 70.117887] Into nd_namespace_pmem_set_resource() + +> +cxl_cmd->payload = cxl_dstate->mbox_reg_state + +> +A_CXL_DEV_CMD_PAYLOAD; +> +ret = (*h)(cxl_cmd, cxl_dstate, &len); +> +> +> +This lets the nvdimm/region probe fine, but I'm getting some issues with +> +namespace capacity so I'll look at what is causing that next. +> +Unfortunately I'm not that familiar with the driver/nvdimm side of things +> +so it's take a while to figure out what kicks off what! +> +> +Jonathan +> +> +> +> +> Jonathan +> +> +> +> +> +> > +> +> > > +> +> > > And x4 region still failed with same errors, using latest cxl/preview +> +> > > branch don't work. +> +> > > I have picked "Two CXL emulation fixes" patches in qemu, still not +> +> > > working. +> +> > > +> +> > > Bob +> +> +> +> +> + +On Mon, 15 Aug 2022 18:04:44 +0100 +Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: + +> +On Fri, 12 Aug 2022 16:44:03 +0100 +> +Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: +> +> +> On Thu, 11 Aug 2022 18:08:57 +0100 +> +> Jonathan Cameron via <qemu-devel@nongnu.org> wrote: +> +> +> +> > On Tue, 9 Aug 2022 17:08:25 +0100 +> +> > Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: +> +> > +> +> > > On Tue, 9 Aug 2022 21:07:06 +0800 +> +> > > Bobo WL <lmw.bobo@gmail.com> wrote: +> +> > > +> +> > > > Hi Jonathan +> +> > > > +> +> > > > Thanks for your reply! +> +> > > > +> +> > > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +> > > > <Jonathan.Cameron@huawei.com> wrote: +> +> > > > > +> +> > > > > Probably not related to your problem, but there is a disconnect in +> +> > > > > QEMU / +> +> > > > > kernel assumptionsaround the presence of an HDM decoder when a HB +> +> > > > > only +> +> > > > > has a single root port. Spec allows it to be provided or not as an +> +> > > > > implementation choice. +> +> > > > > Kernel assumes it isn't provide. Qemu assumes it is. +> +> > > > > +> +> > > > > The temporary solution is to throw in a second root port on the HB +> +> > > > > and not +> +> > > > > connect anything to it. Longer term I may special case this so +> +> > > > > that the particular +> +> > > > > decoder defaults to pass through settings in QEMU if there is only +> +> > > > > one root port. +> +> > > > > +> +> > > > +> +> > > > You are right! After adding an extra HB in qemu, I can create a x1 +> +> > > > region successfully. +> +> > > > But have some errors in Nvdimm: +> +> > > > +> +> > > > [ 74.925838] Unknown online node for memory at 0x10000000000, +> +> > > > assuming node 0 +> +> > > > [ 74.925846] Unknown target node for memory at 0x10000000000, +> +> > > > assuming node 0 +> +> > > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe +> +> > > > +> +> > > +> +> > > Ah. I've seen this one, but not chased it down yet. Was on my todo +> +> > > list to chase +> +> > > down. Once I reach this state I can verify the HDM Decode is correct +> +> > > which is what +> +> > > I've been using to test (Which wasn't true until earlier this week). +> +> > > I'm currently testing via devmem, more for historical reasons than +> +> > > because it makes +> +> > > that much sense anymore. +> +> > +> +> > *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. +> +> > I'd forgotten that was still on the todo list. I don't think it will +> +> > be particularly hard to do and will take a look in next few days. +> +> > +> +> > Very very indirectly this error is causing a driver probe fail that means +> +> > that +> +> > we hit a code path that has a rather odd looking check on NDD_LABELING. +> +> > Should not have gotten near that path though - hence the problem is +> +> > actually +> +> > when we call cxl_pmem_get_config_data() and it returns an error because +> +> > we haven't fully connected up the command in QEMU. +> +> +> +> So a least one bug in QEMU. We were not supporting variable length payloads +> +> on mailbox +> +> inputs (but were on outputs). That hasn't mattered until we get to LSA +> +> writes. +> +> We just need to relax condition on the supplied length. +> +> +> +> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +> +> index c352a935c4..fdda9529fe 100644 +> +> --- a/hw/cxl/cxl-mailbox-utils.c +> +> +++ b/hw/cxl/cxl-mailbox-utils.c +> +> @@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) +> +> cxl_cmd = &cxl_cmd_set[set][cmd]; +> +> h = cxl_cmd->handler; +> +> if (h) { +> +> - if (len == cxl_cmd->in) { +> +> + if (len == cxl_cmd->in || !cxl_cmd->in) { +> +Fix is wrong as we use ~0 as the placeholder for variable payload, not 0. +Cause of the error is a failure in GET_LSA. +Reason, payload length is wrong in QEMU but was hidden previously by my wrong +fix here. Probably still a good idea to inject an error in GET_LSA and chase +down the refcount issue. + + +diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +index fdda9529fe..e8565fbd6e 100644 +--- a/hw/cxl/cxl-mailbox-utils.c ++++ b/hw/cxl/cxl-mailbox-utils.c +@@ -489,7 +489,7 @@ static struct cxl_cmd cxl_cmd_set[256][256] = { + cmd_identify_memory_device, 0, 0 }, + [CCLS][GET_PARTITION_INFO] = { "CCLS_GET_PARTITION_INFO", + cmd_ccls_get_partition_info, 0, 0 }, +- [CCLS][GET_LSA] = { "CCLS_GET_LSA", cmd_ccls_get_lsa, 0, 0 }, ++ [CCLS][GET_LSA] = { "CCLS_GET_LSA", cmd_ccls_get_lsa, 8, 0 }, + [CCLS][SET_LSA] = { "CCLS_SET_LSA", cmd_ccls_set_lsa, + ~0, IMMEDIATE_CONFIG_CHANGE | IMMEDIATE_DATA_CHANGE }, + [MEDIA_AND_POISON][GET_POISON_LIST] = { "MEDIA_AND_POISON_GET_POISON_LIST", +@@ -510,12 +510,13 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) + cxl_cmd = &cxl_cmd_set[set][cmd]; + h = cxl_cmd->handler; + if (h) { +- if (len == cxl_cmd->in || !cxl_cmd->in) { ++ if (len == cxl_cmd->in || cxl_cmd->in == ~0) { + cxl_cmd->payload = cxl_dstate->mbox_reg_state + + A_CXL_DEV_CMD_PAYLOAD; + +And woot, we get a namespace in the LSA :) + +I'll post QEMU fixes in next day or two. Kernel side now seems more or less +fine be it with suspicious refcount underflow. + +> +> +With that fixed we hit new fun paths - after some errors we get the +> +worrying - not totally sure but looks like a failure on an error cleanup. +> +I'll chase down the error source, but even then this is probably triggerable +> +by +> +hardware problem or similar. Some bonus prints in here from me chasing +> +error paths, but it's otherwise just cxl/next + the fix I posted earlier +> +today. +> +> +[ 69.919877] nd_bus ndbus0: START: nd_region.probe(region0) +> +[ 69.920108] nd_region_probe +> +[ 69.920623] ------------[ cut here ]------------ +> +[ 69.920675] refcount_t: addition on 0; use-after-free. +> +[ 69.921314] WARNING: CPU: 3 PID: 710 at lib/refcount.c:25 +> +refcount_warn_saturate+0xa0/0x144 +> +[ 69.926949] Modules linked in: cxl_pmem cxl_mem cxl_pci cxl_port cxl_acpi +> +cxl_core +> +[ 69.928830] CPU: 3 PID: 710 Comm: kworker/u8:9 Not tainted 5.19.0-rc3+ #399 +> +[ 69.930596] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 +> +[ 69.931482] Workqueue: events_unbound async_run_entry_fn +> +[ 69.932403] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) +> +[ 69.934023] pc : refcount_warn_saturate+0xa0/0x144 +> +[ 69.935161] lr : refcount_warn_saturate+0xa0/0x144 +> +[ 69.936541] sp : ffff80000890b960 +> +[ 69.937921] x29: ffff80000890b960 x28: 0000000000000000 x27: +> +0000000000000000 +> +[ 69.940917] x26: ffffa54a90d5cb10 x25: ffffa54a90809e98 x24: +> +0000000000000000 +> +[ 69.942537] x23: ffffa54a91a3d8d8 x22: ffff0000c5254800 x21: +> +ffff0000c5254800 +> +[ 69.944013] x20: ffff0000ce924180 x19: ffff0000c5254800 x18: +> +ffffffffffffffff +> +[ 69.946100] x17: ffff5ab66e5ef000 x16: ffff80000801c000 x15: +> +0000000000000000 +> +[ 69.947585] x14: 0000000000000001 x13: 0a2e656572662d72 x12: +> +657466612d657375 +> +[ 69.948670] x11: 203b30206e6f206e x10: 6f69746964646120 x9 : +> +ffffa54a8f63d288 +> +[ 69.950679] x8 : 206e6f206e6f6974 x7 : 69646461203a745f x6 : +> +00000000fffff31e +> +[ 69.952113] x5 : ffff0000ff61ba08 x4 : 00000000fffff31e x3 : +> +ffff5ab66e5ef000 +> +root@debian:/sys/bus/cxl/devices/decoder0.0/region0# [ 69.954752] x2 : +> +0000000000000000 x1 : 0000000000000000 x0 : ffff0000c512e740 +> +[ 69.957098] Call trace: +> +[ 69.957959] refcount_warn_saturate+0xa0/0x144 +> +[ 69.958773] get_ndd+0x5c/0x80 +> +[ 69.959294] nd_region_register_namespaces+0xe4/0xe90 +> +[ 69.960253] nd_region_probe+0x100/0x290 +> +[ 69.960796] nvdimm_bus_probe+0xf4/0x1c0 +> +[ 69.962087] really_probe+0x19c/0x3f0 +> +[ 69.962620] __driver_probe_device+0x11c/0x190 +> +[ 69.963258] driver_probe_device+0x44/0xf4 +> +[ 69.963773] __device_attach_driver+0xa4/0x140 +> +[ 69.964471] bus_for_each_drv+0x84/0xe0 +> +[ 69.965068] __device_attach+0xb0/0x1f0 +> +[ 69.966101] device_initial_probe+0x20/0x30 +> +[ 69.967142] bus_probe_device+0xa4/0xb0 +> +[ 69.968104] device_add+0x3e8/0x910 +> +[ 69.969111] nd_async_device_register+0x24/0x74 +> +[ 69.969928] async_run_entry_fn+0x40/0x150 +> +[ 69.970725] process_one_work+0x1dc/0x450 +> +[ 69.971796] worker_thread+0x154/0x450 +> +[ 69.972700] kthread+0x118/0x120 +> +[ 69.974141] ret_from_fork+0x10/0x20 +> +[ 69.975141] ---[ end trace 0000000000000000 ]--- +> +[ 70.117887] Into nd_namespace_pmem_set_resource() +> +> +> cxl_cmd->payload = cxl_dstate->mbox_reg_state + +> +> A_CXL_DEV_CMD_PAYLOAD; +> +> ret = (*h)(cxl_cmd, cxl_dstate, &len); +> +> +> +> +> +> This lets the nvdimm/region probe fine, but I'm getting some issues with +> +> namespace capacity so I'll look at what is causing that next. +> +> Unfortunately I'm not that familiar with the driver/nvdimm side of things +> +> so it's take a while to figure out what kicks off what! +> +> +> +> Jonathan +> +> +> +> > +> +> > Jonathan +> +> > +> +> > +> +> > > +> +> > > > +> +> > > > And x4 region still failed with same errors, using latest cxl/preview +> +> > > > branch don't work. +> +> > > > I have picked "Two CXL emulation fixes" patches in qemu, still not +> +> > > > working. +> +> > > > +> +> > > > Bob +> +> > +> +> > +> +> +> + +Jonathan Cameron wrote: +> +On Fri, 12 Aug 2022 16:44:03 +0100 +> +Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: +> +> +> On Thu, 11 Aug 2022 18:08:57 +0100 +> +> Jonathan Cameron via <qemu-devel@nongnu.org> wrote: +> +> +> +> > On Tue, 9 Aug 2022 17:08:25 +0100 +> +> > Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: +> +> > +> +> > > On Tue, 9 Aug 2022 21:07:06 +0800 +> +> > > Bobo WL <lmw.bobo@gmail.com> wrote: +> +> > > +> +> > > > Hi Jonathan +> +> > > > +> +> > > > Thanks for your reply! +> +> > > > +> +> > > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +> > > > <Jonathan.Cameron@huawei.com> wrote: +> +> > > > > +> +> > > > > Probably not related to your problem, but there is a disconnect in +> +> > > > > QEMU / +> +> > > > > kernel assumptionsaround the presence of an HDM decoder when a HB +> +> > > > > only +> +> > > > > has a single root port. Spec allows it to be provided or not as an +> +> > > > > implementation choice. +> +> > > > > Kernel assumes it isn't provide. Qemu assumes it is. +> +> > > > > +> +> > > > > The temporary solution is to throw in a second root port on the HB +> +> > > > > and not +> +> > > > > connect anything to it. Longer term I may special case this so +> +> > > > > that the particular +> +> > > > > decoder defaults to pass through settings in QEMU if there is only +> +> > > > > one root port. +> +> > > > > +> +> > > > +> +> > > > You are right! After adding an extra HB in qemu, I can create a x1 +> +> > > > region successfully. +> +> > > > But have some errors in Nvdimm: +> +> > > > +> +> > > > [ 74.925838] Unknown online node for memory at 0x10000000000, +> +> > > > assuming node 0 +> +> > > > [ 74.925846] Unknown target node for memory at 0x10000000000, +> +> > > > assuming node 0 +> +> > > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe +> +> > > > +> +> > > +> +> > > Ah. I've seen this one, but not chased it down yet. Was on my todo +> +> > > list to chase +> +> > > down. Once I reach this state I can verify the HDM Decode is correct +> +> > > which is what +> +> > > I've been using to test (Which wasn't true until earlier this week). +> +> > > I'm currently testing via devmem, more for historical reasons than +> +> > > because it makes +> +> > > that much sense anymore. +> +> > +> +> > *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. +> +> > I'd forgotten that was still on the todo list. I don't think it will +> +> > be particularly hard to do and will take a look in next few days. +> +> > +> +> > Very very indirectly this error is causing a driver probe fail that means +> +> > that +> +> > we hit a code path that has a rather odd looking check on NDD_LABELING. +> +> > Should not have gotten near that path though - hence the problem is +> +> > actually +> +> > when we call cxl_pmem_get_config_data() and it returns an error because +> +> > we haven't fully connected up the command in QEMU. +> +> +> +> So a least one bug in QEMU. We were not supporting variable length payloads +> +> on mailbox +> +> inputs (but were on outputs). That hasn't mattered until we get to LSA +> +> writes. +> +> We just need to relax condition on the supplied length. +> +> +> +> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +> +> index c352a935c4..fdda9529fe 100644 +> +> --- a/hw/cxl/cxl-mailbox-utils.c +> +> +++ b/hw/cxl/cxl-mailbox-utils.c +> +> @@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) +> +> cxl_cmd = &cxl_cmd_set[set][cmd]; +> +> h = cxl_cmd->handler; +> +> if (h) { +> +> - if (len == cxl_cmd->in) { +> +> + if (len == cxl_cmd->in || !cxl_cmd->in) { +> +Fix is wrong as we use ~0 as the placeholder for variable payload, not 0. +> +> +With that fixed we hit new fun paths - after some errors we get the +> +worrying - not totally sure but looks like a failure on an error cleanup. +> +I'll chase down the error source, but even then this is probably triggerable +> +by +> +hardware problem or similar. Some bonus prints in here from me chasing +> +error paths, but it's otherwise just cxl/next + the fix I posted earlier +> +today. +One of the scenarios that I cannot rule out is nvdimm_probe() racing +nd_region_probe(), but given all the work it takes to create a region I +suspect all the nvdimm_probe() work to have completed... + +It is at least one potentially wrong hypothesis that needs to be chased +down. + +> +> +[ 69.919877] nd_bus ndbus0: START: nd_region.probe(region0) +> +[ 69.920108] nd_region_probe +> +[ 69.920623] ------------[ cut here ]------------ +> +[ 69.920675] refcount_t: addition on 0; use-after-free. +> +[ 69.921314] WARNING: CPU: 3 PID: 710 at lib/refcount.c:25 +> +refcount_warn_saturate+0xa0/0x144 +> +[ 69.926949] Modules linked in: cxl_pmem cxl_mem cxl_pci cxl_port cxl_acpi +> +cxl_core +> +[ 69.928830] CPU: 3 PID: 710 Comm: kworker/u8:9 Not tainted 5.19.0-rc3+ #399 +> +[ 69.930596] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 +> +[ 69.931482] Workqueue: events_unbound async_run_entry_fn +> +[ 69.932403] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) +> +[ 69.934023] pc : refcount_warn_saturate+0xa0/0x144 +> +[ 69.935161] lr : refcount_warn_saturate+0xa0/0x144 +> +[ 69.936541] sp : ffff80000890b960 +> +[ 69.937921] x29: ffff80000890b960 x28: 0000000000000000 x27: +> +0000000000000000 +> +[ 69.940917] x26: ffffa54a90d5cb10 x25: ffffa54a90809e98 x24: +> +0000000000000000 +> +[ 69.942537] x23: ffffa54a91a3d8d8 x22: ffff0000c5254800 x21: +> +ffff0000c5254800 +> +[ 69.944013] x20: ffff0000ce924180 x19: ffff0000c5254800 x18: +> +ffffffffffffffff +> +[ 69.946100] x17: ffff5ab66e5ef000 x16: ffff80000801c000 x15: +> +0000000000000000 +> +[ 69.947585] x14: 0000000000000001 x13: 0a2e656572662d72 x12: +> +657466612d657375 +> +[ 69.948670] x11: 203b30206e6f206e x10: 6f69746964646120 x9 : +> +ffffa54a8f63d288 +> +[ 69.950679] x8 : 206e6f206e6f6974 x7 : 69646461203a745f x6 : +> +00000000fffff31e +> +[ 69.952113] x5 : ffff0000ff61ba08 x4 : 00000000fffff31e x3 : +> +ffff5ab66e5ef000 +> +root@debian:/sys/bus/cxl/devices/decoder0.0/region0# [ 69.954752] x2 : +> +0000000000000000 x1 : 0000000000000000 x0 : ffff0000c512e740 +> +[ 69.957098] Call trace: +> +[ 69.957959] refcount_warn_saturate+0xa0/0x144 +> +[ 69.958773] get_ndd+0x5c/0x80 +> +[ 69.959294] nd_region_register_namespaces+0xe4/0xe90 +> +[ 69.960253] nd_region_probe+0x100/0x290 +> +[ 69.960796] nvdimm_bus_probe+0xf4/0x1c0 +> +[ 69.962087] really_probe+0x19c/0x3f0 +> +[ 69.962620] __driver_probe_device+0x11c/0x190 +> +[ 69.963258] driver_probe_device+0x44/0xf4 +> +[ 69.963773] __device_attach_driver+0xa4/0x140 +> +[ 69.964471] bus_for_each_drv+0x84/0xe0 +> +[ 69.965068] __device_attach+0xb0/0x1f0 +> +[ 69.966101] device_initial_probe+0x20/0x30 +> +[ 69.967142] bus_probe_device+0xa4/0xb0 +> +[ 69.968104] device_add+0x3e8/0x910 +> +[ 69.969111] nd_async_device_register+0x24/0x74 +> +[ 69.969928] async_run_entry_fn+0x40/0x150 +> +[ 69.970725] process_one_work+0x1dc/0x450 +> +[ 69.971796] worker_thread+0x154/0x450 +> +[ 69.972700] kthread+0x118/0x120 +> +[ 69.974141] ret_from_fork+0x10/0x20 +> +[ 69.975141] ---[ end trace 0000000000000000 ]--- +> +[ 70.117887] Into nd_namespace_pmem_set_resource() + +On Mon, 15 Aug 2022 15:55:15 -0700 +Dan Williams <dan.j.williams@intel.com> wrote: + +> +Jonathan Cameron wrote: +> +> On Fri, 12 Aug 2022 16:44:03 +0100 +> +> Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: +> +> +> +> > On Thu, 11 Aug 2022 18:08:57 +0100 +> +> > Jonathan Cameron via <qemu-devel@nongnu.org> wrote: +> +> > +> +> > > On Tue, 9 Aug 2022 17:08:25 +0100 +> +> > > Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: +> +> > > +> +> > > > On Tue, 9 Aug 2022 21:07:06 +0800 +> +> > > > Bobo WL <lmw.bobo@gmail.com> wrote: +> +> > > > +> +> > > > > Hi Jonathan +> +> > > > > +> +> > > > > Thanks for your reply! +> +> > > > > +> +> > > > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +> > > > > <Jonathan.Cameron@huawei.com> wrote: +> +> > > > > > +> +> > > > > > Probably not related to your problem, but there is a disconnect +> +> > > > > > in QEMU / +> +> > > > > > kernel assumptionsaround the presence of an HDM decoder when a HB +> +> > > > > > only +> +> > > > > > has a single root port. Spec allows it to be provided or not as +> +> > > > > > an implementation choice. +> +> > > > > > Kernel assumes it isn't provide. Qemu assumes it is. +> +> > > > > > +> +> > > > > > The temporary solution is to throw in a second root port on the +> +> > > > > > HB and not +> +> > > > > > connect anything to it. Longer term I may special case this so +> +> > > > > > that the particular +> +> > > > > > decoder defaults to pass through settings in QEMU if there is +> +> > > > > > only one root port. +> +> > > > > > +> +> > > > > +> +> > > > > You are right! After adding an extra HB in qemu, I can create a x1 +> +> > > > > region successfully. +> +> > > > > But have some errors in Nvdimm: +> +> > > > > +> +> > > > > [ 74.925838] Unknown online node for memory at 0x10000000000, +> +> > > > > assuming node 0 +> +> > > > > [ 74.925846] Unknown target node for memory at 0x10000000000, +> +> > > > > assuming node 0 +> +> > > > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe +> +> > > > > +> +> > > > +> +> > > > Ah. I've seen this one, but not chased it down yet. Was on my todo +> +> > > > list to chase +> +> > > > down. Once I reach this state I can verify the HDM Decode is correct +> +> > > > which is what +> +> > > > I've been using to test (Which wasn't true until earlier this week). +> +> > > > I'm currently testing via devmem, more for historical reasons than +> +> > > > because it makes +> +> > > > that much sense anymore. +> +> > > +> +> > > *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. +> +> > > I'd forgotten that was still on the todo list. I don't think it will +> +> > > be particularly hard to do and will take a look in next few days. +> +> > > +> +> > > Very very indirectly this error is causing a driver probe fail that +> +> > > means that +> +> > > we hit a code path that has a rather odd looking check on NDD_LABELING. +> +> > > Should not have gotten near that path though - hence the problem is +> +> > > actually +> +> > > when we call cxl_pmem_get_config_data() and it returns an error because +> +> > > we haven't fully connected up the command in QEMU. +> +> > +> +> > So a least one bug in QEMU. We were not supporting variable length +> +> > payloads on mailbox +> +> > inputs (but were on outputs). That hasn't mattered until we get to LSA +> +> > writes. +> +> > We just need to relax condition on the supplied length. +> +> > +> +> > diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +> +> > index c352a935c4..fdda9529fe 100644 +> +> > --- a/hw/cxl/cxl-mailbox-utils.c +> +> > +++ b/hw/cxl/cxl-mailbox-utils.c +> +> > @@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) +> +> > cxl_cmd = &cxl_cmd_set[set][cmd]; +> +> > h = cxl_cmd->handler; +> +> > if (h) { +> +> > - if (len == cxl_cmd->in) { +> +> > + if (len == cxl_cmd->in || !cxl_cmd->in) { +> +> Fix is wrong as we use ~0 as the placeholder for variable payload, not 0. +> +> +> +> With that fixed we hit new fun paths - after some errors we get the +> +> worrying - not totally sure but looks like a failure on an error cleanup. +> +> I'll chase down the error source, but even then this is probably +> +> triggerable by +> +> hardware problem or similar. Some bonus prints in here from me chasing +> +> error paths, but it's otherwise just cxl/next + the fix I posted earlier +> +> today. +> +> +One of the scenarios that I cannot rule out is nvdimm_probe() racing +> +nd_region_probe(), but given all the work it takes to create a region I +> +suspect all the nvdimm_probe() work to have completed... +> +> +It is at least one potentially wrong hypothesis that needs to be chased +> +down. +Maybe there should be a special award for the non-intuitive +ndctl create-namespace command (modifies existing namespace and might create +a different empty one...) I'm sure there is some interesting history behind +that one :) + +Upshot is I just threw a filesystem on fsdax and wrote some text files on it +to allow easy grepping. The right data ends up in the memory and a plausible +namespace description is stored in the LSA. + +So to some degree at least it's 'working' on an 8 way direct connected +set of emulated devices. + +One snag is that serial number support isn't yet upstream in QEMU. +(I have had it in my tree for a while but not posted it yet because of + QEMU feature freeze) +https://gitlab.com/jic23/qemu/-/commit/144c783ea8a5fbe169f46ea1ba92940157f42733 +That's needed for meaningful cookie generation. Otherwise you can build the +namespace once, but it won't work on next probe as the cookie is 0 and you +hit some error paths. + +Maybe sensible to add a sanity check and fail namespace creation if +cookie is 0? (Silly side question, but is there a theoretical risk of +a serial number / other data combination leading to a fletcher64() +checksum that happens to be 0 - that would give a very odd bug report!) + +So to make it work the following is needed: + +1) The kernel fix for mailbox buffer overflow. +2) Qemu fix for size of arguements for get_lsa +3) Qemu fix to allow variable size input arguements (for set_lsa) +4) Serial number patch above + command lines to qemu to set appropriate + serial numbers. + +I'll send out the QEMU fixes shortly and post the Serial number patch, +though that almost certainly won't go in until next QEMU development +cycle starts in a few weeks. + +Next up, run through same tests on some other topologies. + +Jonathan + +> +> +> +> +> [ 69.919877] nd_bus ndbus0: START: nd_region.probe(region0) +> +> [ 69.920108] nd_region_probe +> +> [ 69.920623] ------------[ cut here ]------------ +> +> [ 69.920675] refcount_t: addition on 0; use-after-free. +> +> [ 69.921314] WARNING: CPU: 3 PID: 710 at lib/refcount.c:25 +> +> refcount_warn_saturate+0xa0/0x144 +> +> [ 69.926949] Modules linked in: cxl_pmem cxl_mem cxl_pci cxl_port +> +> cxl_acpi cxl_core +> +> [ 69.928830] CPU: 3 PID: 710 Comm: kworker/u8:9 Not tainted 5.19.0-rc3+ +> +> #399 +> +> [ 69.930596] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 +> +> 02/06/2015 +> +> [ 69.931482] Workqueue: events_unbound async_run_entry_fn +> +> [ 69.932403] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS +> +> BTYPE=--) +> +> [ 69.934023] pc : refcount_warn_saturate+0xa0/0x144 +> +> [ 69.935161] lr : refcount_warn_saturate+0xa0/0x144 +> +> [ 69.936541] sp : ffff80000890b960 +> +> [ 69.937921] x29: ffff80000890b960 x28: 0000000000000000 x27: +> +> 0000000000000000 +> +> [ 69.940917] x26: ffffa54a90d5cb10 x25: ffffa54a90809e98 x24: +> +> 0000000000000000 +> +> [ 69.942537] x23: ffffa54a91a3d8d8 x22: ffff0000c5254800 x21: +> +> ffff0000c5254800 +> +> [ 69.944013] x20: ffff0000ce924180 x19: ffff0000c5254800 x18: +> +> ffffffffffffffff +> +> [ 69.946100] x17: ffff5ab66e5ef000 x16: ffff80000801c000 x15: +> +> 0000000000000000 +> +> [ 69.947585] x14: 0000000000000001 x13: 0a2e656572662d72 x12: +> +> 657466612d657375 +> +> [ 69.948670] x11: 203b30206e6f206e x10: 6f69746964646120 x9 : +> +> ffffa54a8f63d288 +> +> [ 69.950679] x8 : 206e6f206e6f6974 x7 : 69646461203a745f x6 : +> +> 00000000fffff31e +> +> [ 69.952113] x5 : ffff0000ff61ba08 x4 : 00000000fffff31e x3 : +> +> ffff5ab66e5ef000 +> +> root@debian:/sys/bus/cxl/devices/decoder0.0/region0# [ 69.954752] x2 : +> +> 0000000000000000 x1 : 0000000000000000 x0 : ffff0000c512e740 +> +> [ 69.957098] Call trace: +> +> [ 69.957959] refcount_warn_saturate+0xa0/0x144 +> +> [ 69.958773] get_ndd+0x5c/0x80 +> +> [ 69.959294] nd_region_register_namespaces+0xe4/0xe90 +> +> [ 69.960253] nd_region_probe+0x100/0x290 +> +> [ 69.960796] nvdimm_bus_probe+0xf4/0x1c0 +> +> [ 69.962087] really_probe+0x19c/0x3f0 +> +> [ 69.962620] __driver_probe_device+0x11c/0x190 +> +> [ 69.963258] driver_probe_device+0x44/0xf4 +> +> [ 69.963773] __device_attach_driver+0xa4/0x140 +> +> [ 69.964471] bus_for_each_drv+0x84/0xe0 +> +> [ 69.965068] __device_attach+0xb0/0x1f0 +> +> [ 69.966101] device_initial_probe+0x20/0x30 +> +> [ 69.967142] bus_probe_device+0xa4/0xb0 +> +> [ 69.968104] device_add+0x3e8/0x910 +> +> [ 69.969111] nd_async_device_register+0x24/0x74 +> +> [ 69.969928] async_run_entry_fn+0x40/0x150 +> +> [ 69.970725] process_one_work+0x1dc/0x450 +> +> [ 69.971796] worker_thread+0x154/0x450 +> +> [ 69.972700] kthread+0x118/0x120 +> +> [ 69.974141] ret_from_fork+0x10/0x20 +> +> [ 69.975141] ---[ end trace 0000000000000000 ]--- +> +> [ 70.117887] Into nd_namespace_pmem_set_resource() + +Bobo WL wrote: +> +Hi list +> +> +I want to test cxl functions in arm64, and found some problems I can't +> +figure out. +> +> +My test environment: +> +> +1. build latest bios from +https://github.com/tianocore/edk2.git +master +> +branch(cc2db6ebfb6d9d85ba4c7b35fba1fa37fffc0bc2) +> +2. build latest qemu-system-aarch64 from git://git.qemu.org/qemu.git +> +master branch(846dcf0ba4eff824c295f06550b8673ff3f31314). With cxl arm +> +support patch: +> +https://patchwork.kernel.org/project/cxl/cover/20220616141950.23374-1-Jonathan.Cameron@huawei.com/ +> +3. build Linux kernel from +> +https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git +preview +> +branch(65fc1c3d26b96002a5aa1f4012fae4dc98fd5683) +> +4. build latest ndctl tools from +https://github.com/pmem/ndctl +> +create_region branch(8558b394e449779e3a4f3ae90fae77ede0bca159) +> +> +And my qemu test commands: +> +sudo $QEMU_BIN -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 \ +> +-cpu max -smp 8 -nographic -no-reboot \ +> +-kernel $KERNEL -bios $BIOS_BIN \ +> +-drive if=none,file=$ROOTFS,format=qcow2,id=hd \ +> +-device virtio-blk-pci,drive=hd -append 'root=/dev/vda1 +> +nokaslr dyndbg="module cxl* +p"' \ +> +-object memory-backend-ram,size=4G,id=mem0 \ +> +-numa node,nodeid=0,cpus=0-7,memdev=mem0 \ +> +-net nic -net user,hostfwd=tcp::2222-:22 -enable-kvm \ +> +-object +> +memory-backend-file,id=cxl-mem0,share=on,mem-path=/tmp/cxltest.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-mem1,share=on,mem-path=/tmp/cxltest1.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-mem2,share=on,mem-path=/tmp/cxltest2.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-mem3,share=on,mem-path=/tmp/cxltest3.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-lsa0,share=on,mem-path=/tmp/lsa0.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/lsa1.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-lsa2,share=on,mem-path=/tmp/lsa2.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-lsa3,share=on,mem-path=/tmp/lsa3.raw,size=256M +> +\ +> +-device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \ +> +-device cxl-rp,port=0,bus=cxl.1,id=root_port0,chassis=0,slot=0 \ +> +-device cxl-upstream,bus=root_port0,id=us0 \ +> +-device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \ +> +-device +> +cxl-type3,bus=swport0,memdev=cxl-mem0,lsa=cxl-lsa0,id=cxl-pmem0 \ +> +-device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \ +> +-device +> +cxl-type3,bus=swport1,memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem1 \ +> +-device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \ +> +-device +> +cxl-type3,bus=swport2,memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem2 \ +> +-device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \ +> +-device +> +cxl-type3,bus=swport3,memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3 \ +> +-M +> +cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k +> +> +And I have got two problems. +> +1. When I want to create x1 region with command: "cxl create-region -d +> +decoder0.0 -w 1 -g 4096 mem0", kernel crashed with null pointer +> +reference. Crash log: +> +> +[ 534.697324] cxl_region region0: config state: 0 +> +[ 534.697346] cxl_region region0: probe: -6 +> +[ 534.697368] cxl_acpi ACPI0017:00: decoder0.0: created region0 +> +[ 534.699115] cxl region0: mem0:endpoint3 decoder3.0 add: +> +mem0:decoder3.0 @ 0 next: none nr_eps: 1 nr_targets: 1 +> +[ 534.699149] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +> +mem0:decoder3.0 @ 0 next: mem0 nr_eps: 1 nr_targets: 1 +> +[ 534.699167] cxl region0: ACPI0016:00:port1 decoder1.0 add: +> +mem0:decoder3.0 @ 0 next: 0000:0d:00.0 nr_eps: 1 nr_targets: 1 +> +[ 534.699176] cxl region0: ACPI0016:00:port1 iw: 1 ig: 256 +> +[ 534.699182] cxl region0: ACPI0016:00:port1 target[0] = 0000:0c:00.0 +> +for mem0:decoder3.0 @ 0 +> +[ 534.699189] cxl region0: 0000:0d:00.0:port2 iw: 1 ig: 256 +> +[ 534.699193] cxl region0: 0000:0d:00.0:port2 target[0] = +> +0000:0e:00.0 for mem0:decoder3.0 @ 0 +> +[ 534.699405] Unable to handle kernel NULL pointer dereference at +> +virtual address 0000000000000000 +> +[ 534.701474] Mem abort info: +> +[ 534.701994] ESR = 0x0000000086000004 +> +[ 534.702653] EC = 0x21: IABT (current EL), IL = 32 bits +> +[ 534.703616] SET = 0, FnV = 0 +> +[ 534.704174] EA = 0, S1PTW = 0 +> +[ 534.704803] FSC = 0x04: level 0 translation fault +> +[ 534.705694] user pgtable: 4k pages, 48-bit VAs, pgdp=000000010144a000 +> +[ 534.706875] [0000000000000000] pgd=0000000000000000, p4d=0000000000000000 +> +[ 534.709855] Internal error: Oops: 86000004 [#1] PREEMPT SMP +> +[ 534.710301] Modules linked in: +> +[ 534.710546] CPU: 7 PID: 331 Comm: cxl Not tainted +> +5.19.0-rc3-00064-g65fc1c3d26b9-dirty #11 +> +[ 534.715393] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 +> +[ 534.717179] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) +> +[ 534.719190] pc : 0x0 +> +[ 534.719928] lr : commit_store+0x118/0x2cc +> +[ 534.721007] sp : ffff80000aec3c30 +> +[ 534.721793] x29: ffff80000aec3c30 x28: ffff0000da62e740 x27: +> +ffff0000c0c06b30 +> +[ 534.723875] x26: 0000000000000000 x25: ffff0000c0a2a400 x24: +> +ffff0000c0a29400 +> +[ 534.725440] x23: 0000000000000003 x22: 0000000000000000 x21: +> +ffff0000c0c06800 +> +[ 534.727312] x20: 0000000000000000 x19: ffff0000c1559800 x18: +> +0000000000000000 +> +[ 534.729138] x17: 0000000000000000 x16: 0000000000000000 x15: +> +0000ffffd41fe838 +> +[ 534.731046] x14: 0000000000000000 x13: 0000000000000000 x12: +> +0000000000000000 +> +[ 534.732402] x11: 0000000000000000 x10: 0000000000000000 x9 : +> +0000000000000000 +> +[ 534.734432] x8 : 0000000000000000 x7 : 0000000000000000 x6 : +> +ffff0000c0906e80 +> +[ 534.735921] x5 : 0000000000000000 x4 : 0000000000000000 x3 : +> +ffff80000aec3bf0 +> +[ 534.737437] x2 : 0000000000000000 x1 : 0000000000000000 x0 : +> +ffff0000c155a000 +> +[ 534.738878] Call trace: +> +[ 534.739368] 0x0 +> +[ 534.739713] dev_attr_store+0x1c/0x30 +> +[ 534.740186] sysfs_kf_write+0x48/0x58 +> +[ 534.740961] kernfs_fop_write_iter+0x128/0x184 +> +[ 534.741872] new_sync_write+0xdc/0x158 +> +[ 534.742706] vfs_write+0x1ac/0x2a8 +> +[ 534.743440] ksys_write+0x68/0xf0 +> +[ 534.744328] __arm64_sys_write+0x1c/0x28 +> +[ 534.745180] invoke_syscall+0x44/0xf0 +> +[ 534.745989] el0_svc_common+0x4c/0xfc +> +[ 534.746661] do_el0_svc+0x60/0xa8 +> +[ 534.747378] el0_svc+0x2c/0x78 +> +[ 534.748066] el0t_64_sync_handler+0xb8/0x12c +> +[ 534.748919] el0t_64_sync+0x18c/0x190 +> +[ 534.749629] Code: bad PC value +> +[ 534.750169] ---[ end trace 0000000000000000 ]--- +What was the top kernel commit when you ran this test? What is the line +number of "commit_store+0x118"? + +> +2. When I want to create x4 region with command: "cxl create-region -d +> +decoder0.0 -w 4 -g 4096 -m mem0 mem1 mem2 mem3". I got below errors: +> +> +cxl region: create_region: region0: failed to set target3 to mem3 +> +cxl region: cmd_create_region: created 0 regions +> +> +And kernel log as below: +> +[ 60.536663] cxl_region region0: config state: 0 +> +[ 60.536675] cxl_region region0: probe: -6 +> +[ 60.536696] cxl_acpi ACPI0017:00: decoder0.0: created region0 +> +[ 60.538251] cxl region0: mem0:endpoint3 decoder3.0 add: +> +mem0:decoder3.0 @ 0 next: none nr_eps: 1 nr_targets: 1 +> +[ 60.538278] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +> +mem0:decoder3.0 @ 0 next: mem0 nr_eps: 1 nr_targets: 1 +> +[ 60.538295] cxl region0: ACPI0016:00:port1 decoder1.0 add: +> +mem0:decoder3.0 @ 0 next: 0000:0d:00.0 nr_eps: 1 nr_targets: 1 +> +[ 60.538647] cxl region0: mem1:endpoint4 decoder4.0 add: +> +mem1:decoder4.0 @ 1 next: none nr_eps: 1 nr_targets: 1 +> +[ 60.538663] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +> +mem1:decoder4.0 @ 1 next: mem1 nr_eps: 2 nr_targets: 2 +> +[ 60.538675] cxl region0: ACPI0016:00:port1 decoder1.0 add: +> +mem1:decoder4.0 @ 1 next: 0000:0d:00.0 nr_eps: 2 nr_targets: 1 +> +[ 60.539311] cxl region0: mem2:endpoint5 decoder5.0 add: +> +mem2:decoder5.0 @ 2 next: none nr_eps: 1 nr_targets: 1 +> +[ 60.539332] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +> +mem2:decoder5.0 @ 2 next: mem2 nr_eps: 3 nr_targets: 3 +> +[ 60.539343] cxl region0: ACPI0016:00:port1 decoder1.0 add: +> +mem2:decoder5.0 @ 2 next: 0000:0d:00.0 nr_eps: 3 nr_targets: 1 +> +[ 60.539711] cxl region0: mem3:endpoint6 decoder6.0 add: +> +mem3:decoder6.0 @ 3 next: none nr_eps: 1 nr_targets: 1 +> +[ 60.539723] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +> +mem3:decoder6.0 @ 3 next: mem3 nr_eps: 4 nr_targets: 4 +> +[ 60.539735] cxl region0: ACPI0016:00:port1 decoder1.0 add: +> +mem3:decoder6.0 @ 3 next: 0000:0d:00.0 nr_eps: 4 nr_targets: 1 +> +[ 60.539742] cxl region0: ACPI0016:00:port1 iw: 1 ig: 256 +> +[ 60.539747] cxl region0: ACPI0016:00:port1 target[0] = 0000:0c:00.0 +> +for mem0:decoder3.0 @ 0 +> +[ 60.539754] cxl region0: 0000:0d:00.0:port2 iw: 4 ig: 512 +> +[ 60.539758] cxl region0: 0000:0d:00.0:port2 target[0] = +> +0000:0e:00.0 for mem0:decoder3.0 @ 0 +> +[ 60.539764] cxl region0: ACPI0016:00:port1: cannot host mem1:decoder4.0 at +> +1 +> +> +I have tried to write sysfs node manually, got same errors. +> +> +Hope I can get some helps here. +What is the output of: + + cxl list -MDTu -d decoder0.0 + +...? It might be the case that mem1 cannot be mapped by decoder0.0, or +at least not in the specified order, or that validation check is broken. + +Hi Dan, + +Thanks for your reply! + +On Mon, Aug 8, 2022 at 11:58 PM Dan Williams <dan.j.williams@intel.com> wrote: +> +> +What is the output of: +> +> +cxl list -MDTu -d decoder0.0 +> +> +...? It might be the case that mem1 cannot be mapped by decoder0.0, or +> +at least not in the specified order, or that validation check is broken. +Command "cxl list -MDTu -d decoder0.0" output: + +[ + { + "memdevs":[ + { + "memdev":"mem2", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":0, + "serial":"0", + "host":"0000:11:00.0" + }, + { + "memdev":"mem1", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":0, + "serial":"0", + "host":"0000:10:00.0" + }, + { + "memdev":"mem0", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":0, + "serial":"0", + "host":"0000:0f:00.0" + }, + { + "memdev":"mem3", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":0, + "serial":"0", + "host":"0000:12:00.0" + } + ] + }, + { + "root decoders":[ + { + "decoder":"decoder0.0", + "resource":"0x10000000000", + "size":"4.00 GiB (4.29 GB)", + "pmem_capable":true, + "volatile_capable":true, + "accelmem_capable":true, + "nr_targets":1, + "targets":[ + { + "target":"ACPI0016:01", + "alias":"pci0000:0c", + "position":0, + "id":"0xc" + } + ] + } + ] + } +] + +Bobo WL wrote: +> +Hi Dan, +> +> +Thanks for your reply! +> +> +On Mon, Aug 8, 2022 at 11:58 PM Dan Williams <dan.j.williams@intel.com> wrote: +> +> +> +> What is the output of: +> +> +> +> cxl list -MDTu -d decoder0.0 +> +> +> +> ...? It might be the case that mem1 cannot be mapped by decoder0.0, or +> +> at least not in the specified order, or that validation check is broken. +> +> +Command "cxl list -MDTu -d decoder0.0" output: +Thanks for this, I think I know the problem, but will try some +experiments with cxl_test first. + +Did the commit_store() crash stop reproducing with latest cxl/preview +branch? + +On Tue, Aug 9, 2022 at 11:17 PM Dan Williams <dan.j.williams@intel.com> wrote: +> +> +Bobo WL wrote: +> +> Hi Dan, +> +> +> +> Thanks for your reply! +> +> +> +> On Mon, Aug 8, 2022 at 11:58 PM Dan Williams <dan.j.williams@intel.com> +> +> wrote: +> +> > +> +> > What is the output of: +> +> > +> +> > cxl list -MDTu -d decoder0.0 +> +> > +> +> > ...? It might be the case that mem1 cannot be mapped by decoder0.0, or +> +> > at least not in the specified order, or that validation check is broken. +> +> +> +> Command "cxl list -MDTu -d decoder0.0" output: +> +> +Thanks for this, I think I know the problem, but will try some +> +experiments with cxl_test first. +> +> +Did the commit_store() crash stop reproducing with latest cxl/preview +> +branch? +No, still hitting this bug if don't add extra HB device in qemu + +Dan Williams wrote: +> +Bobo WL wrote: +> +> Hi Dan, +> +> +> +> Thanks for your reply! +> +> +> +> On Mon, Aug 8, 2022 at 11:58 PM Dan Williams <dan.j.williams@intel.com> +> +> wrote: +> +> > +> +> > What is the output of: +> +> > +> +> > cxl list -MDTu -d decoder0.0 +> +> > +> +> > ...? It might be the case that mem1 cannot be mapped by decoder0.0, or +> +> > at least not in the specified order, or that validation check is broken. +> +> +> +> Command "cxl list -MDTu -d decoder0.0" output: +> +> +Thanks for this, I think I know the problem, but will try some +> +experiments with cxl_test first. +Hmm, so my cxl_test experiment unfortunately passed so I'm not +reproducing the failure mode. This is the result of creating x4 region +with devices directly attached to a single host-bridge: + +# cxl create-region -d decoder3.5 -w 4 -m -g 256 mem{12,10,9,11} -s $((1<<30)) +{ + "region":"region8", + "resource":"0xf1f0000000", + "size":"1024.00 MiB (1073.74 MB)", + "interleave_ways":4, + "interleave_granularity":256, + "decode_state":"commit", + "mappings":[ + { + "position":3, + "memdev":"mem11", + "decoder":"decoder21.0" + }, + { + "position":2, + "memdev":"mem9", + "decoder":"decoder19.0" + }, + { + "position":1, + "memdev":"mem10", + "decoder":"decoder20.0" + }, + { + "position":0, + "memdev":"mem12", + "decoder":"decoder22.0" + } + ] +} +cxl region: cmd_create_region: created 1 region + +> +Did the commit_store() crash stop reproducing with latest cxl/preview +> +branch? +I missed the answer to this question. + +All of these changes are now in Linus' tree perhaps give that a try and +post the debug log again? + +On Thu, 11 Aug 2022 17:46:55 -0700 +Dan Williams <dan.j.williams@intel.com> wrote: + +> +Dan Williams wrote: +> +> Bobo WL wrote: +> +> > Hi Dan, +> +> > +> +> > Thanks for your reply! +> +> > +> +> > On Mon, Aug 8, 2022 at 11:58 PM Dan Williams <dan.j.williams@intel.com> +> +> > wrote: +> +> > > +> +> > > What is the output of: +> +> > > +> +> > > cxl list -MDTu -d decoder0.0 +> +> > > +> +> > > ...? It might be the case that mem1 cannot be mapped by decoder0.0, or +> +> > > at least not in the specified order, or that validation check is +> +> > > broken. +> +> > +> +> > Command "cxl list -MDTu -d decoder0.0" output: +> +> +> +> Thanks for this, I think I know the problem, but will try some +> +> experiments with cxl_test first. +> +> +Hmm, so my cxl_test experiment unfortunately passed so I'm not +> +reproducing the failure mode. This is the result of creating x4 region +> +with devices directly attached to a single host-bridge: +> +> +# cxl create-region -d decoder3.5 -w 4 -m -g 256 mem{12,10,9,11} -s $((1<<30)) +> +{ +> +"region":"region8", +> +"resource":"0xf1f0000000", +> +"size":"1024.00 MiB (1073.74 MB)", +> +"interleave_ways":4, +> +"interleave_granularity":256, +> +"decode_state":"commit", +> +"mappings":[ +> +{ +> +"position":3, +> +"memdev":"mem11", +> +"decoder":"decoder21.0" +> +}, +> +{ +> +"position":2, +> +"memdev":"mem9", +> +"decoder":"decoder19.0" +> +}, +> +{ +> +"position":1, +> +"memdev":"mem10", +> +"decoder":"decoder20.0" +> +}, +> +{ +> +"position":0, +> +"memdev":"mem12", +> +"decoder":"decoder22.0" +> +} +> +] +> +} +> +cxl region: cmd_create_region: created 1 region +> +> +> Did the commit_store() crash stop reproducing with latest cxl/preview +> +> branch? +> +> +I missed the answer to this question. +> +> +All of these changes are now in Linus' tree perhaps give that a try and +> +post the debug log again? +Hi Dan, + +I've moved onto looking at this one. +1 HB, 2RP (to make it configure the HDM decoder in the QEMU HB, I'll tidy that +up +at some stage), 1 switch, 4 downstream switch ports each with a type 3 + +I'm not getting a crash, but can't successfully setup a region. +Upon adding the final target +It's failing in check_last_peer() as pos < distance. +Seems distance is 4 which makes me think it's using the wrong level of the +heirarchy for +some reason or that distance check is wrong. +Wasn't a good idea to just skip that step though as it goes boom - though +stack trace is not useful. + +Jonathan + +On Wed, 17 Aug 2022 17:16:19 +0100 +Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: + +> +On Thu, 11 Aug 2022 17:46:55 -0700 +> +Dan Williams <dan.j.williams@intel.com> wrote: +> +> +> Dan Williams wrote: +> +> > Bobo WL wrote: +> +> > > Hi Dan, +> +> > > +> +> > > Thanks for your reply! +> +> > > +> +> > > On Mon, Aug 8, 2022 at 11:58 PM Dan Williams <dan.j.williams@intel.com> +> +> > > wrote: +> +> > > > +> +> > > > What is the output of: +> +> > > > +> +> > > > cxl list -MDTu -d decoder0.0 +> +> > > > +> +> > > > ...? It might be the case that mem1 cannot be mapped by decoder0.0, or +> +> > > > at least not in the specified order, or that validation check is +> +> > > > broken. +> +> > > +> +> > > Command "cxl list -MDTu -d decoder0.0" output: +> +> > +> +> > Thanks for this, I think I know the problem, but will try some +> +> > experiments with cxl_test first. +> +> +> +> Hmm, so my cxl_test experiment unfortunately passed so I'm not +> +> reproducing the failure mode. This is the result of creating x4 region +> +> with devices directly attached to a single host-bridge: +> +> +> +> # cxl create-region -d decoder3.5 -w 4 -m -g 256 mem{12,10,9,11} -s +> +> $((1<<30)) +> +> { +> +> "region":"region8", +> +> "resource":"0xf1f0000000", +> +> "size":"1024.00 MiB (1073.74 MB)", +> +> "interleave_ways":4, +> +> "interleave_granularity":256, +> +> "decode_state":"commit", +> +> "mappings":[ +> +> { +> +> "position":3, +> +> "memdev":"mem11", +> +> "decoder":"decoder21.0" +> +> }, +> +> { +> +> "position":2, +> +> "memdev":"mem9", +> +> "decoder":"decoder19.0" +> +> }, +> +> { +> +> "position":1, +> +> "memdev":"mem10", +> +> "decoder":"decoder20.0" +> +> }, +> +> { +> +> "position":0, +> +> "memdev":"mem12", +> +> "decoder":"decoder22.0" +> +> } +> +> ] +> +> } +> +> cxl region: cmd_create_region: created 1 region +> +> +> +> > Did the commit_store() crash stop reproducing with latest cxl/preview +> +> > branch? +> +> +> +> I missed the answer to this question. +> +> +> +> All of these changes are now in Linus' tree perhaps give that a try and +> +> post the debug log again? +> +> +Hi Dan, +> +> +I've moved onto looking at this one. +> +1 HB, 2RP (to make it configure the HDM decoder in the QEMU HB, I'll tidy +> +that up +> +at some stage), 1 switch, 4 downstream switch ports each with a type 3 +> +> +I'm not getting a crash, but can't successfully setup a region. +> +Upon adding the final target +> +It's failing in check_last_peer() as pos < distance. +> +Seems distance is 4 which makes me think it's using the wrong level of the +> +heirarchy for +> +some reason or that distance check is wrong. +> +Wasn't a good idea to just skip that step though as it goes boom - though +> +stack trace is not useful. +Turns out really weird corruption happens if you accidentally back two type3 +devices +with the same memory device. Who would have thought it :) + +That aside ignoring the check_last_peer() failure seems to make everything work +for this +topology. I'm not seeing the crash, so my guess is we fixed it somewhere along +the way. + +Now for the fun one. I've replicated the crash if we have + +1HB 1*RP 1SW, 4SW-DSP, 4Type3 + +Now, I'd expect to see it not 'work' because the QEMU HDM decoder won't be +programmed +but the null pointer dereference isn't related to that. + +The bug is straight forward. Not all decoders have commit callbacks... Will +send out +a possible fix shortly. + +Jonathan + + + +> +> +Jonathan +> +> +> +> +> +> + +On Thu, 18 Aug 2022 17:37:40 +0100 +Jonathan Cameron via <qemu-devel@nongnu.org> wrote: + +> +On Wed, 17 Aug 2022 17:16:19 +0100 +> +Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: +> +> +> On Thu, 11 Aug 2022 17:46:55 -0700 +> +> Dan Williams <dan.j.williams@intel.com> wrote: +> +> +> +> > Dan Williams wrote: +> +> > > Bobo WL wrote: +> +> > > > Hi Dan, +> +> > > > +> +> > > > Thanks for your reply! +> +> > > > +> +> > > > On Mon, Aug 8, 2022 at 11:58 PM Dan Williams +> +> > > > <dan.j.williams@intel.com> wrote: +> +> > > > > +> +> > > > > What is the output of: +> +> > > > > +> +> > > > > cxl list -MDTu -d decoder0.0 +> +> > > > > +> +> > > > > ...? It might be the case that mem1 cannot be mapped by decoder0.0, +> +> > > > > or +> +> > > > > at least not in the specified order, or that validation check is +> +> > > > > broken. +> +> > > > +> +> > > > Command "cxl list -MDTu -d decoder0.0" output: +> +> > > +> +> > > Thanks for this, I think I know the problem, but will try some +> +> > > experiments with cxl_test first. +> +> > +> +> > Hmm, so my cxl_test experiment unfortunately passed so I'm not +> +> > reproducing the failure mode. This is the result of creating x4 region +> +> > with devices directly attached to a single host-bridge: +> +> > +> +> > # cxl create-region -d decoder3.5 -w 4 -m -g 256 mem{12,10,9,11} -s +> +> > $((1<<30)) +> +> > { +> +> > "region":"region8", +> +> > "resource":"0xf1f0000000", +> +> > "size":"1024.00 MiB (1073.74 MB)", +> +> > "interleave_ways":4, +> +> > "interleave_granularity":256, +> +> > "decode_state":"commit", +> +> > "mappings":[ +> +> > { +> +> > "position":3, +> +> > "memdev":"mem11", +> +> > "decoder":"decoder21.0" +> +> > }, +> +> > { +> +> > "position":2, +> +> > "memdev":"mem9", +> +> > "decoder":"decoder19.0" +> +> > }, +> +> > { +> +> > "position":1, +> +> > "memdev":"mem10", +> +> > "decoder":"decoder20.0" +> +> > }, +> +> > { +> +> > "position":0, +> +> > "memdev":"mem12", +> +> > "decoder":"decoder22.0" +> +> > } +> +> > ] +> +> > } +> +> > cxl region: cmd_create_region: created 1 region +> +> > +> +> > > Did the commit_store() crash stop reproducing with latest cxl/preview +> +> > > branch? +> +> > +> +> > I missed the answer to this question. +> +> > +> +> > All of these changes are now in Linus' tree perhaps give that a try and +> +> > post the debug log again? +> +> +> +> Hi Dan, +> +> +> +> I've moved onto looking at this one. +> +> 1 HB, 2RP (to make it configure the HDM decoder in the QEMU HB, I'll tidy +> +> that up +> +> at some stage), 1 switch, 4 downstream switch ports each with a type 3 +> +> +> +> I'm not getting a crash, but can't successfully setup a region. +> +> Upon adding the final target +> +> It's failing in check_last_peer() as pos < distance. +> +> Seems distance is 4 which makes me think it's using the wrong level of the +> +> heirarchy for +> +> some reason or that distance check is wrong. +> +> Wasn't a good idea to just skip that step though as it goes boom - though +> +> stack trace is not useful. +> +> +Turns out really weird corruption happens if you accidentally back two type3 +> +devices +> +with the same memory device. Who would have thought it :) +> +> +That aside ignoring the check_last_peer() failure seems to make everything +> +work for this +> +topology. I'm not seeing the crash, so my guess is we fixed it somewhere +> +along the way. +> +> +Now for the fun one. I've replicated the crash if we have +> +> +1HB 1*RP 1SW, 4SW-DSP, 4Type3 +> +> +Now, I'd expect to see it not 'work' because the QEMU HDM decoder won't be +> +programmed +> +but the null pointer dereference isn't related to that. +> +> +The bug is straight forward. Not all decoders have commit callbacks... Will +> +send out +> +a possible fix shortly. +> +For completeness I'm carrying this hack because I haven't gotten my head +around the right fix for check_last_peer() failing on this test topology. + +diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c +index c49d9a5f1091..275e143bd748 100644 +--- a/drivers/cxl/core/region.c ++++ b/drivers/cxl/core/region.c +@@ -978,7 +978,7 @@ static int cxl_port_setup_targets(struct cxl_port *port, + rc = check_last_peer(cxled, ep, cxl_rr, + distance); + if (rc) +- return rc; ++ // return rc; + goto out_target_set; + } + goto add_target; +-- + +I might find more bugs with more testing, but this is all the ones I've +seen so far + in Bobo's reports. Qemu fixes are now in upstream so +will be there in the release. + +As a reminder, testing on QEMU has a few corners... + +Need a patch to add serial number ECAP support. It is on list for revew, +but will have wait for after QEMU 7.1 release (which may be next week) + +QEMU still assumes HDM decoder on the host bridge will be programmed. +So if you want anything to work there should be at least +2 RP below the HB (no need to plug anything in to one of them). + +I don't want to add a commandline parameter to hide the decoder in QEMU +and detecting there is only one RP would require moving a bunch of static +stuff into runtime code (I think). + +I still think we should make the kernel check to see if there is a decoder, +but if not I might see how bad a hack it is to have QEMU ignore that decoder +if not committed in this one special case (HB HDM decoder with only one place +it can send stuff). Obviously that would be a break from specification +so less than idea! + +Thanks, + +Jonathan + +On Fri, 19 Aug 2022 09:46:55 +0100 +Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: + +> +On Thu, 18 Aug 2022 17:37:40 +0100 +> +Jonathan Cameron via <qemu-devel@nongnu.org> wrote: +> +> +> On Wed, 17 Aug 2022 17:16:19 +0100 +> +> Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: +> +> +> +> > On Thu, 11 Aug 2022 17:46:55 -0700 +> +> > Dan Williams <dan.j.williams@intel.com> wrote: +> +> > +> +> > > Dan Williams wrote: +> +> > > > Bobo WL wrote: +> +> > > > > Hi Dan, +> +> > > > > +> +> > > > > Thanks for your reply! +> +> > > > > +> +> > > > > On Mon, Aug 8, 2022 at 11:58 PM Dan Williams +> +> > > > > <dan.j.williams@intel.com> wrote: +> +> > > > > > +> +> > > > > > What is the output of: +> +> > > > > > +> +> > > > > > cxl list -MDTu -d decoder0.0 +> +> > > > > > +> +> > > > > > ...? It might be the case that mem1 cannot be mapped by +> +> > > > > > decoder0.0, or +> +> > > > > > at least not in the specified order, or that validation check is +> +> > > > > > broken. +> +> > > > > +> +> > > > > Command "cxl list -MDTu -d decoder0.0" output: +> +> > > > +> +> > > > Thanks for this, I think I know the problem, but will try some +> +> > > > experiments with cxl_test first. +> +> > > +> +> > > Hmm, so my cxl_test experiment unfortunately passed so I'm not +> +> > > reproducing the failure mode. This is the result of creating x4 region +> +> > > with devices directly attached to a single host-bridge: +> +> > > +> +> > > # cxl create-region -d decoder3.5 -w 4 -m -g 256 mem{12,10,9,11} -s +> +> > > $((1<<30)) +> +> > > { +> +> > > "region":"region8", +> +> > > "resource":"0xf1f0000000", +> +> > > "size":"1024.00 MiB (1073.74 MB)", +> +> > > "interleave_ways":4, +> +> > > "interleave_granularity":256, +> +> > > "decode_state":"commit", +> +> > > "mappings":[ +> +> > > { +> +> > > "position":3, +> +> > > "memdev":"mem11", +> +> > > "decoder":"decoder21.0" +> +> > > }, +> +> > > { +> +> > > "position":2, +> +> > > "memdev":"mem9", +> +> > > "decoder":"decoder19.0" +> +> > > }, +> +> > > { +> +> > > "position":1, +> +> > > "memdev":"mem10", +> +> > > "decoder":"decoder20.0" +> +> > > }, +> +> > > { +> +> > > "position":0, +> +> > > "memdev":"mem12", +> +> > > "decoder":"decoder22.0" +> +> > > } +> +> > > ] +> +> > > } +> +> > > cxl region: cmd_create_region: created 1 region +> +> > > +> +> > > > Did the commit_store() crash stop reproducing with latest cxl/preview +> +> > > > branch? +> +> > > +> +> > > I missed the answer to this question. +> +> > > +> +> > > All of these changes are now in Linus' tree perhaps give that a try and +> +> > > post the debug log again? +> +> > +> +> > Hi Dan, +> +> > +> +> > I've moved onto looking at this one. +> +> > 1 HB, 2RP (to make it configure the HDM decoder in the QEMU HB, I'll tidy +> +> > that up +> +> > at some stage), 1 switch, 4 downstream switch ports each with a type 3 +> +> > +> +> > I'm not getting a crash, but can't successfully setup a region. +> +> > Upon adding the final target +> +> > It's failing in check_last_peer() as pos < distance. +> +> > Seems distance is 4 which makes me think it's using the wrong level of +> +> > the heirarchy for +> +> > some reason or that distance check is wrong. +> +> > Wasn't a good idea to just skip that step though as it goes boom - though +> +> > stack trace is not useful. +> +> +> +> Turns out really weird corruption happens if you accidentally back two +> +> type3 devices +> +> with the same memory device. Who would have thought it :) +> +> +> +> That aside ignoring the check_last_peer() failure seems to make everything +> +> work for this +> +> topology. I'm not seeing the crash, so my guess is we fixed it somewhere +> +> along the way. +> +> +> +> Now for the fun one. I've replicated the crash if we have +> +> +> +> 1HB 1*RP 1SW, 4SW-DSP, 4Type3 +> +> +> +> Now, I'd expect to see it not 'work' because the QEMU HDM decoder won't be +> +> programmed +> +> but the null pointer dereference isn't related to that. +> +> +> +> The bug is straight forward. Not all decoders have commit callbacks... +> +> Will send out +> +> a possible fix shortly. +> +> +> +For completeness I'm carrying this hack because I haven't gotten my head +> +around the right fix for check_last_peer() failing on this test topology. +> +> +diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c +> +index c49d9a5f1091..275e143bd748 100644 +> +--- a/drivers/cxl/core/region.c +> ++++ b/drivers/cxl/core/region.c +> +@@ -978,7 +978,7 @@ static int cxl_port_setup_targets(struct cxl_port *port, +> +rc = check_last_peer(cxled, ep, cxl_rr, +> +distance); +> +if (rc) +> +- return rc; +> ++ // return rc; +> +goto out_target_set; +> +} +> +goto add_target; +I'm still carrying this hack and still haven't worked out the right fix. + +Suggestions welcome! If not I'll hopefully get some time on this +towards the end of the week. + +Jonathan + diff --git a/results/classifier/014/none/42613410 b/results/classifier/014/none/42613410 new file mode 100644 index 00000000..c51fabbc --- /dev/null +++ b/results/classifier/014/none/42613410 @@ -0,0 +1,176 @@ +hypervisor: 0.527 +operating system: 0.515 +risc-v: 0.468 +user-level: 0.460 +TCG: 0.454 +vnc: 0.400 +KVM: 0.381 +permissions: 0.373 +peripherals: 0.359 +virtual: 0.347 +device: 0.342 +ppc: 0.334 +graphic: 0.330 +register: 0.330 +semantic: 0.327 +performance: 0.324 +architecture: 0.319 +VMM: 0.318 +mistranslation: 0.314 +debug: 0.311 +arm: 0.288 +network: 0.284 +x86: 0.284 +assembly: 0.283 +PID: 0.276 +files: 0.264 +i386: 0.230 +kernel: 0.200 +socket: 0.190 +alpha: 0.187 +boot: 0.187 + +[Qemu-devel] [PATCH, Bug 1612908] scripts: Add TCP endpoints for qom-* scripts + +From: Carl Allendorph <address@hidden> + +I've created a patch for bug #1612908. The current docs for the scripts +in the "scripts/qmp/" directory suggest that both unix sockets and +tcp endpoints can be used. The TCP endpoints don't work for most of the +scripts, with notable exception of 'qmp-shell'. This patch attempts to +refactor the process of distinguishing between unix path endpoints and +tcp endpoints to work for all of these scripts. + +Carl Allendorph (1): + scripts: Add ability for qom-* python scripts to target tcp endpoints + + scripts/qmp/qmp-shell | 22 ++-------------------- + scripts/qmp/qmp.py | 23 ++++++++++++++++++++--- + 2 files changed, 22 insertions(+), 23 deletions(-) + +-- +2.7.4 + +From: Carl Allendorph <address@hidden> + +The current code for QEMUMonitorProtocol accepts both a unix socket +endpoint as a string and a tcp endpoint as a tuple. Most of the scripts +that use this class don't massage the command line argument to generate +a tuple. This patch refactors qmp-shell slightly to reuse the existing +parsing of the "host:port" string for all the qom-* scripts. + +Signed-off-by: Carl Allendorph <address@hidden> +--- + scripts/qmp/qmp-shell | 22 ++-------------------- + scripts/qmp/qmp.py | 23 ++++++++++++++++++++--- + 2 files changed, 22 insertions(+), 23 deletions(-) + +diff --git a/scripts/qmp/qmp-shell b/scripts/qmp/qmp-shell +index 0373b24..8a2a437 100755 +--- a/scripts/qmp/qmp-shell ++++ b/scripts/qmp/qmp-shell +@@ -83,9 +83,6 @@ class QMPCompleter(list): + class QMPShellError(Exception): + pass + +-class QMPShellBadPort(QMPShellError): +- pass +- + class FuzzyJSON(ast.NodeTransformer): + '''This extension of ast.NodeTransformer filters literal "true/false/null" + values in an AST and replaces them by proper "True/False/None" values that +@@ -103,28 +100,13 @@ class FuzzyJSON(ast.NodeTransformer): + # _execute_cmd()). Let's design a better one. + class QMPShell(qmp.QEMUMonitorProtocol): + def __init__(self, address, pretty=False): +- qmp.QEMUMonitorProtocol.__init__(self, self.__get_address(address)) ++ qmp.QEMUMonitorProtocol.__init__(self, address) + self._greeting = None + self._completer = None + self._pretty = pretty + self._transmode = False + self._actions = list() + +- def __get_address(self, arg): +- """ +- Figure out if the argument is in the port:host form, if it's not it's +- probably a file path. +- """ +- addr = arg.split(':') +- if len(addr) == 2: +- try: +- port = int(addr[1]) +- except ValueError: +- raise QMPShellBadPort +- return ( addr[0], port ) +- # socket path +- return arg +- + def _fill_completion(self): + for cmd in self.cmd('query-commands')['return']: + self._completer.append(cmd['name']) +@@ -400,7 +382,7 @@ def main(): + + if qemu is None: + fail_cmdline() +- except QMPShellBadPort: ++ except qmp.QMPShellBadPort: + die('bad port number in command-line') + + try: +diff --git a/scripts/qmp/qmp.py b/scripts/qmp/qmp.py +index 62d3651..261ece8 100644 +--- a/scripts/qmp/qmp.py ++++ b/scripts/qmp/qmp.py +@@ -25,21 +25,23 @@ class QMPCapabilitiesError(QMPError): + class QMPTimeoutError(QMPError): + pass + ++class QMPShellBadPort(QMPError): ++ pass ++ + class QEMUMonitorProtocol: + def __init__(self, address, server=False, debug=False): + """ + Create a QEMUMonitorProtocol class. + + @param address: QEMU address, can be either a unix socket path (string) +- or a tuple in the form ( address, port ) for a TCP +- connection ++ or a TCP endpoint (string in the format "host:port") + @param server: server mode listens on the socket (bool) + @raise socket.error on socket connection errors + @note No connection is established, this is done by the connect() or + accept() methods + """ + self.__events = [] +- self.__address = address ++ self.__address = self.__get_address(address) + self._debug = debug + self.__sock = self.__get_sock() + if server: +@@ -47,6 +49,21 @@ class QEMUMonitorProtocol: + self.__sock.bind(self.__address) + self.__sock.listen(1) + ++ def __get_address(self, arg): ++ """ ++ Figure out if the argument is in the port:host form, if it's not it's ++ probably a file path. ++ """ ++ addr = arg.split(':') ++ if len(addr) == 2: ++ try: ++ port = int(addr[1]) ++ except ValueError: ++ raise QMPShellBadPort ++ return ( addr[0], port ) ++ # socket path ++ return arg ++ + def __get_sock(self): + if isinstance(self.__address, tuple): + family = socket.AF_INET +-- +2.7.4 + diff --git a/results/classifier/014/none/43643137 b/results/classifier/014/none/43643137 new file mode 100644 index 00000000..76c22330 --- /dev/null +++ b/results/classifier/014/none/43643137 @@ -0,0 +1,565 @@ +KVM: 0.794 +x86: 0.791 +performance: 0.784 +hypervisor: 0.780 +debug: 0.775 +operating system: 0.773 +register: 0.767 +virtual: 0.766 +risc-v: 0.765 +semantic: 0.764 +user-level: 0.761 +device: 0.760 +permissions: 0.755 +VMM: 0.747 +arm: 0.747 +PID: 0.742 +vnc: 0.742 +TCG: 0.737 +peripherals: 0.734 +ppc: 0.728 +assembly: 0.727 +graphic: 0.721 +network: 0.709 +alpha: 0.708 +kernel: 0.707 +architecture: 0.699 +socket: 0.674 +mistranslation: 0.665 +i386: 0.662 +boot: 0.652 +files: 0.612 + +[Qemu-devel] [BUG/RFC] INIT IPI lost when VM starts + +Hi, +We encountered a problem that when a domain starts, seabios failed to online a +vCPU. + +After investigation, we found that the reason is in kvm-kmod, KVM_APIC_INIT bit +in +vcpu->arch.apic->pending_events was overwritten by qemu, and thus an INIT IPI +sent +to AP was lost. Qemu does this since libvirtd sends a âquery-cpusâ qmp command +to qemu +on VM start. + +In qemu, qmp_query_cpus-> cpu_synchronize_state-> kvm_cpu_synchronize_state-> +do_kvm_cpu_synchronize_state, qemu gets registers/vcpu_events from kvm-kmod and +sets cpu->kvm_vcpu_dirty to true, and vcpu thread in qemu will call +kvm_arch_put_registers if cpu->kvm_vcpu_dirty is true, thus pending_events is +overwritten by qemu. + +I think there is no need for qemu to set cpu->kvm_vcpu_dirty to true after +âquery-cpusâ, +and kvm-kmod should not clear KVM_APIC_INIT unconditionally. And I am not sure +whether +it is OK for qemu to set cpu->kvm_vcpu_dirty in do_kvm_cpu_synchronize_state in +each caller. + +Whatâs your opinion? + +Let me clarify it more clearly. Time sequence is that qemu handles âquery-cpusâ qmp +command, vcpu 1 (and vcpu 0) got registers from kvm-kmod (qmp_query_cpus-> +cpu_synchronize_state-> kvm_cpu_synchronize_state-> +> do_kvm_cpu_synchronize_state-> kvm_arch_get_registers), then vcpu 0 (BSP) +sends INIT-SIPI to vcpu 1(AP). In kvm-kmod, vcpu 1âs pending_eventsâs KVM_APIC_INIT +bit set. +Then vcpu 1 continue running, vcpu1 thread in qemu calls +kvm_arch_put_registers-> kvm_put_vcpu_events, so KVM_APIC_INIT bit in vcpu 1âs +pending_events got cleared, i.e., lost. + +In kvm-kmod, except for pending_events, sipi_vector may also be overwritten., +so I am not sure if there are other fields/registers in danger, i.e., those may +be modified asynchronously with vcpu thread itself. + +BTW, using a sleep like following can reliably reproduce this problem, if VM +equipped with more than 2 vcpus and starting VM using libvirtd. + +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index 55865db..5099290 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -2534,6 +2534,11 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level) + KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SIPI_VECTOR; + } + ++ if (CPU(cpu)->cpu_index == 1) { ++ fprintf(stderr, "vcpu 1 sleep!!!!\n"); ++ sleep(10); ++ } ++ + return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events); + } + + +On 2017/3/20 22:21, Herongguang (Stephen) wrote: +Hi, +We encountered a problem that when a domain starts, seabios failed to online a +vCPU. + +After investigation, we found that the reason is in kvm-kmod, KVM_APIC_INIT bit +in +vcpu->arch.apic->pending_events was overwritten by qemu, and thus an INIT IPI +sent +to AP was lost. Qemu does this since libvirtd sends a âquery-cpusâ qmp command +to qemu +on VM start. + +In qemu, qmp_query_cpus-> cpu_synchronize_state-> kvm_cpu_synchronize_state-> +do_kvm_cpu_synchronize_state, qemu gets registers/vcpu_events from kvm-kmod and +sets cpu->kvm_vcpu_dirty to true, and vcpu thread in qemu will call +kvm_arch_put_registers if cpu->kvm_vcpu_dirty is true, thus pending_events is +overwritten by qemu. + +I think there is no need for qemu to set cpu->kvm_vcpu_dirty to true after +âquery-cpusâ, +and kvm-kmod should not clear KVM_APIC_INIT unconditionally. And I am not sure +whether +it is OK for qemu to set cpu->kvm_vcpu_dirty in do_kvm_cpu_synchronize_state in +each caller. + +Whatâs your opinion? + +On 20/03/2017 15:21, Herongguang (Stephen) wrote: +> +> +We encountered a problem that when a domain starts, seabios failed to +> +online a vCPU. +> +> +After investigation, we found that the reason is in kvm-kmod, +> +KVM_APIC_INIT bit in +> +vcpu->arch.apic->pending_events was overwritten by qemu, and thus an +> +INIT IPI sent +> +to AP was lost. Qemu does this since libvirtd sends a âquery-cpusâ qmp +> +command to qemu +> +on VM start. +> +> +In qemu, qmp_query_cpus-> cpu_synchronize_state-> +> +kvm_cpu_synchronize_state-> +> +do_kvm_cpu_synchronize_state, qemu gets registers/vcpu_events from +> +kvm-kmod and +> +sets cpu->kvm_vcpu_dirty to true, and vcpu thread in qemu will call +> +kvm_arch_put_registers if cpu->kvm_vcpu_dirty is true, thus +> +pending_events is +> +overwritten by qemu. +> +> +I think there is no need for qemu to set cpu->kvm_vcpu_dirty to true +> +after âquery-cpusâ, +> +and kvm-kmod should not clear KVM_APIC_INIT unconditionally. And I am +> +not sure whether +> +it is OK for qemu to set cpu->kvm_vcpu_dirty in +> +do_kvm_cpu_synchronize_state in each caller. +> +> +Whatâs your opinion? +Hi Rongguang, + +sorry for the late response. + +Where exactly is KVM_APIC_INIT dropped? kvm_get_mp_state does clear the +bit, but the result of the INIT is stored in mp_state. + +kvm_get_vcpu_events is called after kvm_get_mp_state; it retrieves +KVM_APIC_INIT in events.smi.latched_init and kvm_set_vcpu_events passes +it back. Maybe it should ignore events.smi.latched_init if not in SMM, +but I would like to understand the exact sequence of events. + +Thanks, + +paolo + +On 2017/4/6 0:16, Paolo Bonzini wrote: +On 20/03/2017 15:21, Herongguang (Stephen) wrote: +We encountered a problem that when a domain starts, seabios failed to +online a vCPU. + +After investigation, we found that the reason is in kvm-kmod, +KVM_APIC_INIT bit in +vcpu->arch.apic->pending_events was overwritten by qemu, and thus an +INIT IPI sent +to AP was lost. Qemu does this since libvirtd sends a âquery-cpusâ qmp +command to qemu +on VM start. + +In qemu, qmp_query_cpus-> cpu_synchronize_state-> +kvm_cpu_synchronize_state-> +do_kvm_cpu_synchronize_state, qemu gets registers/vcpu_events from +kvm-kmod and +sets cpu->kvm_vcpu_dirty to true, and vcpu thread in qemu will call +kvm_arch_put_registers if cpu->kvm_vcpu_dirty is true, thus +pending_events is +overwritten by qemu. + +I think there is no need for qemu to set cpu->kvm_vcpu_dirty to true +after âquery-cpusâ, +and kvm-kmod should not clear KVM_APIC_INIT unconditionally. And I am +not sure whether +it is OK for qemu to set cpu->kvm_vcpu_dirty in +do_kvm_cpu_synchronize_state in each caller. + +Whatâs your opinion? +Hi Rongguang, + +sorry for the late response. + +Where exactly is KVM_APIC_INIT dropped? kvm_get_mp_state does clear the +bit, but the result of the INIT is stored in mp_state. +It's dropped in KVM_SET_VCPU_EVENTS, see below. +kvm_get_vcpu_events is called after kvm_get_mp_state; it retrieves +KVM_APIC_INIT in events.smi.latched_init and kvm_set_vcpu_events passes +it back. Maybe it should ignore events.smi.latched_init if not in SMM, +but I would like to understand the exact sequence of events. +time0: +vcpu1: +qmp_query_cpus-> cpu_synchronize_state-> kvm_cpu_synchronize_state-> +> do_kvm_cpu_synchronize_state(and set vcpu1's cpu->kvm_vcpu_dirty to true)-> +kvm_arch_get_registers(KVM_APIC_INIT bit in vcpu->arch.apic->pending_events was not set) + +time1: +vcpu0: +send INIT-SIPI to all AP->(in vcpu 0's context)__apic_accept_irq(KVM_APIC_INIT bit +in vcpu1's arch.apic->pending_events is set) + +time2: +vcpu1: +kvm_cpu_exec->(if cpu->kvm_vcpu_dirty is +true)kvm_arch_put_registers->kvm_put_vcpu_events(overwritten KVM_APIC_INIT bit in +vcpu->arch.apic->pending_events!) + +So it's a race between vcpu1 get/put registers with kvm/other vcpus changing +vcpu1's status/structure fields in the mean time, I am in worry of if there are +other fields may be overwritten, +sipi_vector is one. + +also see: +https://www.mail-archive.com/address@hidden/msg438675.html +Thanks, + +paolo + +. + +Hi Paolo, + +What's your opinion about this patch? We found it just before finishing patches +for the past two days. + + +Thanks, +-Gonglei + + +> +-----Original Message----- +> +From: address@hidden [ +mailto:address@hidden +On +> +Behalf Of Herongguang (Stephen) +> +Sent: Thursday, April 06, 2017 9:47 AM +> +To: Paolo Bonzini; address@hidden; address@hidden; +> +address@hidden; address@hidden; address@hidden; +> +wangxin (U); Huangweidong (C) +> +Subject: Re: [BUG/RFC] INIT IPI lost when VM starts +> +> +> +> +On 2017/4/6 0:16, Paolo Bonzini wrote: +> +> +> +> On 20/03/2017 15:21, Herongguang (Stephen) wrote: +> +>> We encountered a problem that when a domain starts, seabios failed to +> +>> online a vCPU. +> +>> +> +>> After investigation, we found that the reason is in kvm-kmod, +> +>> KVM_APIC_INIT bit in +> +>> vcpu->arch.apic->pending_events was overwritten by qemu, and thus an +> +>> INIT IPI sent +> +>> to AP was lost. Qemu does this since libvirtd sends a âquery-cpusâ qmp +> +>> command to qemu +> +>> on VM start. +> +>> +> +>> In qemu, qmp_query_cpus-> cpu_synchronize_state-> +> +>> kvm_cpu_synchronize_state-> +> +>> do_kvm_cpu_synchronize_state, qemu gets registers/vcpu_events from +> +>> kvm-kmod and +> +>> sets cpu->kvm_vcpu_dirty to true, and vcpu thread in qemu will call +> +>> kvm_arch_put_registers if cpu->kvm_vcpu_dirty is true, thus +> +>> pending_events is +> +>> overwritten by qemu. +> +>> +> +>> I think there is no need for qemu to set cpu->kvm_vcpu_dirty to true +> +>> after âquery-cpusâ, +> +>> and kvm-kmod should not clear KVM_APIC_INIT unconditionally. And I am +> +>> not sure whether +> +>> it is OK for qemu to set cpu->kvm_vcpu_dirty in +> +>> do_kvm_cpu_synchronize_state in each caller. +> +>> +> +>> Whatâs your opinion? +> +> Hi Rongguang, +> +> +> +> sorry for the late response. +> +> +> +> Where exactly is KVM_APIC_INIT dropped? kvm_get_mp_state does clear +> +the +> +> bit, but the result of the INIT is stored in mp_state. +> +> +It's dropped in KVM_SET_VCPU_EVENTS, see below. +> +> +> +> +> kvm_get_vcpu_events is called after kvm_get_mp_state; it retrieves +> +> KVM_APIC_INIT in events.smi.latched_init and kvm_set_vcpu_events passes +> +> it back. Maybe it should ignore events.smi.latched_init if not in SMM, +> +> but I would like to understand the exact sequence of events. +> +> +time0: +> +vcpu1: +> +qmp_query_cpus-> cpu_synchronize_state-> kvm_cpu_synchronize_state-> +> +> do_kvm_cpu_synchronize_state(and set vcpu1's cpu->kvm_vcpu_dirty to +> +true)-> kvm_arch_get_registers(KVM_APIC_INIT bit in +> +vcpu->arch.apic->pending_events was not set) +> +> +time1: +> +vcpu0: +> +send INIT-SIPI to all AP->(in vcpu 0's +> +context)__apic_accept_irq(KVM_APIC_INIT bit in vcpu1's +> +arch.apic->pending_events is set) +> +> +time2: +> +vcpu1: +> +kvm_cpu_exec->(if cpu->kvm_vcpu_dirty is +> +true)kvm_arch_put_registers->kvm_put_vcpu_events(overwritten +> +KVM_APIC_INIT bit in vcpu->arch.apic->pending_events!) +> +> +So it's a race between vcpu1 get/put registers with kvm/other vcpus changing +> +vcpu1's status/structure fields in the mean time, I am in worry of if there +> +are +> +other fields may be overwritten, +> +sipi_vector is one. +> +> +also see: +> +https://www.mail-archive.com/address@hidden/msg438675.html +> +> +> Thanks, +> +> +> +> paolo +> +> +> +> . +> +> +> + +2017-11-20 06:57+0000, Gonglei (Arei): +> +Hi Paolo, +> +> +What's your opinion about this patch? We found it just before finishing +> +patches +> +for the past two days. +I think your case was fixed by f4ef19108608 ("KVM: X86: Fix loss of +pending INIT due to race"), but that patch didn't fix it perfectly, so +maybe you're hitting a similar case that happens in SMM ... + +> +> -----Original Message----- +> +> From: address@hidden [ +mailto:address@hidden +On +> +> Behalf Of Herongguang (Stephen) +> +> On 2017/4/6 0:16, Paolo Bonzini wrote: +> +> > Hi Rongguang, +> +> > +> +> > sorry for the late response. +> +> > +> +> > Where exactly is KVM_APIC_INIT dropped? kvm_get_mp_state does clear +> +> the +> +> > bit, but the result of the INIT is stored in mp_state. +> +> +> +> It's dropped in KVM_SET_VCPU_EVENTS, see below. +> +> +> +> > +> +> > kvm_get_vcpu_events is called after kvm_get_mp_state; it retrieves +> +> > KVM_APIC_INIT in events.smi.latched_init and kvm_set_vcpu_events passes +> +> > it back. Maybe it should ignore events.smi.latched_init if not in SMM, +> +> > but I would like to understand the exact sequence of events. +> +> +> +> time0: +> +> vcpu1: +> +> qmp_query_cpus-> cpu_synchronize_state-> kvm_cpu_synchronize_state-> +> +> > do_kvm_cpu_synchronize_state(and set vcpu1's cpu->kvm_vcpu_dirty to +> +> true)-> kvm_arch_get_registers(KVM_APIC_INIT bit in +> +> vcpu->arch.apic->pending_events was not set) +> +> +> +> time1: +> +> vcpu0: +> +> send INIT-SIPI to all AP->(in vcpu 0's +> +> context)__apic_accept_irq(KVM_APIC_INIT bit in vcpu1's +> +> arch.apic->pending_events is set) +> +> +> +> time2: +> +> vcpu1: +> +> kvm_cpu_exec->(if cpu->kvm_vcpu_dirty is +> +> true)kvm_arch_put_registers->kvm_put_vcpu_events(overwritten +> +> KVM_APIC_INIT bit in vcpu->arch.apic->pending_events!) +> +> +> +> So it's a race between vcpu1 get/put registers with kvm/other vcpus changing +> +> vcpu1's status/structure fields in the mean time, I am in worry of if there +> +> are +> +> other fields may be overwritten, +> +> sipi_vector is one. +Fields that can be asynchronously written by other VCPUs (like SIPI, +NMI) must not be SET if other VCPUs were not paused since the last GET. +(Looking at the interface, we can currently lose pending SMI.) + +INIT is one of the restricted fields, but the API unconditionally +couples SMM with latched INIT, which means that we can lose an INIT if +the VCPU is in SMM mode -- do you see SMM in kvm_vcpu_events? + +Thanks. + diff --git a/results/classifier/014/none/55367348 b/results/classifier/014/none/55367348 new file mode 100644 index 00000000..8a89ae57 --- /dev/null +++ b/results/classifier/014/none/55367348 @@ -0,0 +1,559 @@ +risc-v: 0.675 +user-level: 0.674 +mistranslation: 0.615 +permissions: 0.595 +device: 0.586 +arm: 0.573 +PID: 0.559 +semantic: 0.555 +register: 0.553 +performance: 0.546 +operating system: 0.546 +graphic: 0.532 +assembly: 0.531 +architecture: 0.530 +ppc: 0.523 +network: 0.518 +TCG: 0.517 +debug: 0.516 +virtual: 0.512 +hypervisor: 0.502 +socket: 0.501 +files: 0.490 +boot: 0.486 +VMM: 0.473 +KVM: 0.470 +peripherals: 0.466 +vnc: 0.465 +kernel: 0.441 +x86: 0.402 +alpha: 0.375 +i386: 0.370 + +[Qemu-devel] [Bug] Docs build fails at interop.rst + +https://paste.fedoraproject.org/paste/kOPx4jhtUli---TmxSLrlw +running python3-sphinx-2.0.1-1.fc31.noarch on Fedora release 31 +(Rawhide) + +uname - a +Linux iouring 5.1.0-0.rc6.git3.1.fc31.x86_64 #1 SMP Thu Apr 25 14:25:32 +UTC 2019 x86_64 x86_64 x86_64 GNU/Linux + +Reverting commmit 90edef80a0852cf8a3d2668898ee40e8970e431 +allows for the build to occur + +Regards +Aarushi Mehta + +On 5/20/19 7:30 AM, Aarushi Mehta wrote: +> +https://paste.fedoraproject.org/paste/kOPx4jhtUli---TmxSLrlw +> +running python3-sphinx-2.0.1-1.fc31.noarch on Fedora release 31 +> +(Rawhide) +> +> +uname - a +> +Linux iouring 5.1.0-0.rc6.git3.1.fc31.x86_64 #1 SMP Thu Apr 25 14:25:32 +> +UTC 2019 x86_64 x86_64 x86_64 GNU/Linux +> +> +Reverting commmit 90edef80a0852cf8a3d2668898ee40e8970e431 +> +allows for the build to occur +> +> +Regards +> +Aarushi Mehta +> +> +Ah, dang. The blocks aren't strictly conforming json, but the version I +tested this under didn't seem to care. Your version is much newer. (I +was using 1.7 as provided by Fedora 29.) + +For now, try reverting 9e5b6cb87db66dfb606604fe6cf40e5ddf1ef0e7 instead, +which should at least turn off the "warnings as errors" option, but I +don't think that reverting -n will turn off this warning. + +I'll try to get ahold of this newer version and see if I can't fix it +more appropriately. + +--js + +On 5/20/19 12:37 PM, John Snow wrote: +> +> +> +On 5/20/19 7:30 AM, Aarushi Mehta wrote: +> +> +https://paste.fedoraproject.org/paste/kOPx4jhtUli---TmxSLrlw +> +> running python3-sphinx-2.0.1-1.fc31.noarch on Fedora release 31 +> +> (Rawhide) +> +> +> +> uname - a +> +> Linux iouring 5.1.0-0.rc6.git3.1.fc31.x86_64 #1 SMP Thu Apr 25 14:25:32 +> +> UTC 2019 x86_64 x86_64 x86_64 GNU/Linux +> +> +> +> Reverting commmit 90edef80a0852cf8a3d2668898ee40e8970e431 +> +> allows for the build to occur +> +> +> +> Regards +> +> Aarushi Mehta +> +> +> +> +> +> +Ah, dang. The blocks aren't strictly conforming json, but the version I +> +tested this under didn't seem to care. Your version is much newer. (I +> +was using 1.7 as provided by Fedora 29.) +> +> +For now, try reverting 9e5b6cb87db66dfb606604fe6cf40e5ddf1ef0e7 instead, +> +which should at least turn off the "warnings as errors" option, but I +> +don't think that reverting -n will turn off this warning. +> +> +I'll try to get ahold of this newer version and see if I can't fix it +> +more appropriately. +> +> +--js +> +...Sigh, okay. + +So, I am still not actually sure what changed from pygments 2.2 and +sphinx 1.7 to pygments 2.4 and sphinx 2.0.1, but it appears as if Sphinx +by default always tries to do add a filter to the pygments lexer that +raises an error on highlighting failure, instead of the default behavior +which is to just highlight those errors in the output. There is no +option to Sphinx that I am aware of to retain this lexing behavior. +(Effectively, it's strict or nothing.) + +This approach, apparently, is broken in Sphinx 1.7/Pygments 2.2, so the +build works with our malformed json. + +There are a few options: + +1. Update conf.py to ignore these warnings (and all future lexing +errors), and settle for the fact that there will be no QMP highlighting +wherever we use the directionality indicators ('->', '<-'). + +2. Update bitmaps.rst to remove the directionality indicators. + +3. Update bitmaps.rst to format the QMP blocks as raw text instead of JSON. + +4. Update bitmaps.rst to remove the "json" specification from the code +block. This will cause sphinx to "guess" the formatting, and the +pygments guesser will decide it's Python3. + +This will parse well enough, but will mis-highlight 'true' and 'false' +which are not python keywords. This approach may break in the future if +the Python3 lexer is upgraded to be stricter (because '->' and '<-' are +still invalid), and leaves us at the mercy of both the guesser and the +lexer. + +I'm not actually sure what I dislike the least; I think I dislike #1 the +most. #4 gets us most of what we want but is perhaps porcelain. + +I suspect if we attempt to move more of our documentation to ReST and +Sphinx that we will need to answer for ourselves how we intend to +document QMP code flow examples. + +--js + +On Mon, May 20, 2019 at 05:25:28PM -0400, John Snow wrote: +> +> +> +On 5/20/19 12:37 PM, John Snow wrote: +> +> +> +> +> +> On 5/20/19 7:30 AM, Aarushi Mehta wrote: +> +>> +https://paste.fedoraproject.org/paste/kOPx4jhtUli---TmxSLrlw +> +>> running python3-sphinx-2.0.1-1.fc31.noarch on Fedora release 31 +> +>> (Rawhide) +> +>> +> +>> uname - a +> +>> Linux iouring 5.1.0-0.rc6.git3.1.fc31.x86_64 #1 SMP Thu Apr 25 14:25:32 +> +>> UTC 2019 x86_64 x86_64 x86_64 GNU/Linux +> +>> +> +>> Reverting commmit 90edef80a0852cf8a3d2668898ee40e8970e431 +> +>> allows for the build to occur +> +>> +> +>> Regards +> +>> Aarushi Mehta +> +>> +> +>> +> +> +> +> Ah, dang. The blocks aren't strictly conforming json, but the version I +> +> tested this under didn't seem to care. Your version is much newer. (I +> +> was using 1.7 as provided by Fedora 29.) +> +> +> +> For now, try reverting 9e5b6cb87db66dfb606604fe6cf40e5ddf1ef0e7 instead, +> +> which should at least turn off the "warnings as errors" option, but I +> +> don't think that reverting -n will turn off this warning. +> +> +> +> I'll try to get ahold of this newer version and see if I can't fix it +> +> more appropriately. +> +> +> +> --js +> +> +> +> +...Sigh, okay. +> +> +So, I am still not actually sure what changed from pygments 2.2 and +> +sphinx 1.7 to pygments 2.4 and sphinx 2.0.1, but it appears as if Sphinx +> +by default always tries to do add a filter to the pygments lexer that +> +raises an error on highlighting failure, instead of the default behavior +> +which is to just highlight those errors in the output. There is no +> +option to Sphinx that I am aware of to retain this lexing behavior. +> +(Effectively, it's strict or nothing.) +> +> +This approach, apparently, is broken in Sphinx 1.7/Pygments 2.2, so the +> +build works with our malformed json. +> +> +There are a few options: +> +> +1. Update conf.py to ignore these warnings (and all future lexing +> +errors), and settle for the fact that there will be no QMP highlighting +> +wherever we use the directionality indicators ('->', '<-'). +> +> +2. Update bitmaps.rst to remove the directionality indicators. +> +> +3. Update bitmaps.rst to format the QMP blocks as raw text instead of JSON. +> +> +4. Update bitmaps.rst to remove the "json" specification from the code +> +block. This will cause sphinx to "guess" the formatting, and the +> +pygments guesser will decide it's Python3. +> +> +This will parse well enough, but will mis-highlight 'true' and 'false' +> +which are not python keywords. This approach may break in the future if +> +the Python3 lexer is upgraded to be stricter (because '->' and '<-' are +> +still invalid), and leaves us at the mercy of both the guesser and the +> +lexer. +> +> +I'm not actually sure what I dislike the least; I think I dislike #1 the +> +most. #4 gets us most of what we want but is perhaps porcelain. +> +> +I suspect if we attempt to move more of our documentation to ReST and +> +Sphinx that we will need to answer for ourselves how we intend to +> +document QMP code flow examples. +Writing a custom lexer that handles "<-" and "->" was simple (see below). + +Now, is it possible to convince Sphinx to register and use a custom lexer? + +$ cat > /tmp/lexer.py <<EOF +from pygments.lexer import RegexLexer, DelegatingLexer +from pygments.lexers.data import JsonLexer +import re +from pygments.token import * + +class QMPExampleMarkersLexer(RegexLexer): + tokens = { + 'root': [ + (r' *-> *', Generic.Prompt), + (r' *<- *', Generic.Output), + ] + } + +class QMPExampleLexer(DelegatingLexer): + def __init__(self, **options): + super(QMPExampleLexer, self).__init__(JsonLexer, +QMPExampleMarkersLexer, Error, **options) +EOF +$ pygmentize -l /tmp/lexer.py:QMPExampleLexer -x -f html <<EOF + -> { + "execute": "drive-backup", + "arguments": { + "device": "drive0", + "bitmap": "bitmap0", + "target": "drive0.inc0.qcow2", + "format": "qcow2", + "sync": "incremental", + "mode": "existing" + } + } + + <- { "return": {} } +EOF +<div class="highlight"><pre><span></span><span class="gp"> -> +</span><span class="p">{</span> + <span class="nt">"execute"</span><span class="p">:</span> +<span class="s2">"drive-backup"</span><span class="p">,</span> + <span class="nt">"arguments"</span><span class="p">:</span> +<span class="p">{</span> + <span class="nt">"device"</span><span class="p">:</span> +<span class="s2">"drive0"</span><span class="p">,</span> + <span class="nt">"bitmap"</span><span class="p">:</span> +<span class="s2">"bitmap0"</span><span class="p">,</span> + <span class="nt">"target"</span><span class="p">:</span> +<span class="s2">"drive0.inc0.qcow2"</span><span class="p">,</span> + <span class="nt">"format"</span><span class="p">:</span> +<span class="s2">"qcow2"</span><span class="p">,</span> + <span class="nt">"sync"</span><span class="p">:</span> +<span class="s2">"incremental"</span><span class="p">,</span> + <span class="nt">"mode"</span><span class="p">:</span> +<span class="s2">"existing"</span> + <span class="p">}</span> + <span class="p">}</span> + +<span class="go"> <- </span><span class="p">{</span> <span +class="nt">"return"</span><span class="p">:</span> <span +class="p">{}</span> <span class="p">}</span> +</pre></div> +$ + + +-- +Eduardo + +On 5/20/19 7:04 PM, Eduardo Habkost wrote: +> +On Mon, May 20, 2019 at 05:25:28PM -0400, John Snow wrote: +> +> +> +> +> +> On 5/20/19 12:37 PM, John Snow wrote: +> +>> +> +>> +> +>> On 5/20/19 7:30 AM, Aarushi Mehta wrote: +> +>>> +https://paste.fedoraproject.org/paste/kOPx4jhtUli---TmxSLrlw +> +>>> running python3-sphinx-2.0.1-1.fc31.noarch on Fedora release 31 +> +>>> (Rawhide) +> +>>> +> +>>> uname - a +> +>>> Linux iouring 5.1.0-0.rc6.git3.1.fc31.x86_64 #1 SMP Thu Apr 25 14:25:32 +> +>>> UTC 2019 x86_64 x86_64 x86_64 GNU/Linux +> +>>> +> +>>> Reverting commmit 90edef80a0852cf8a3d2668898ee40e8970e431 +> +>>> allows for the build to occur +> +>>> +> +>>> Regards +> +>>> Aarushi Mehta +> +>>> +> +>>> +> +>> +> +>> Ah, dang. The blocks aren't strictly conforming json, but the version I +> +>> tested this under didn't seem to care. Your version is much newer. (I +> +>> was using 1.7 as provided by Fedora 29.) +> +>> +> +>> For now, try reverting 9e5b6cb87db66dfb606604fe6cf40e5ddf1ef0e7 instead, +> +>> which should at least turn off the "warnings as errors" option, but I +> +>> don't think that reverting -n will turn off this warning. +> +>> +> +>> I'll try to get ahold of this newer version and see if I can't fix it +> +>> more appropriately. +> +>> +> +>> --js +> +>> +> +> +> +> ...Sigh, okay. +> +> +> +> So, I am still not actually sure what changed from pygments 2.2 and +> +> sphinx 1.7 to pygments 2.4 and sphinx 2.0.1, but it appears as if Sphinx +> +> by default always tries to do add a filter to the pygments lexer that +> +> raises an error on highlighting failure, instead of the default behavior +> +> which is to just highlight those errors in the output. There is no +> +> option to Sphinx that I am aware of to retain this lexing behavior. +> +> (Effectively, it's strict or nothing.) +> +> +> +> This approach, apparently, is broken in Sphinx 1.7/Pygments 2.2, so the +> +> build works with our malformed json. +> +> +> +> There are a few options: +> +> +> +> 1. Update conf.py to ignore these warnings (and all future lexing +> +> errors), and settle for the fact that there will be no QMP highlighting +> +> wherever we use the directionality indicators ('->', '<-'). +> +> +> +> 2. Update bitmaps.rst to remove the directionality indicators. +> +> +> +> 3. Update bitmaps.rst to format the QMP blocks as raw text instead of JSON. +> +> +> +> 4. Update bitmaps.rst to remove the "json" specification from the code +> +> block. This will cause sphinx to "guess" the formatting, and the +> +> pygments guesser will decide it's Python3. +> +> +> +> This will parse well enough, but will mis-highlight 'true' and 'false' +> +> which are not python keywords. This approach may break in the future if +> +> the Python3 lexer is upgraded to be stricter (because '->' and '<-' are +> +> still invalid), and leaves us at the mercy of both the guesser and the +> +> lexer. +> +> +> +> I'm not actually sure what I dislike the least; I think I dislike #1 the +> +> most. #4 gets us most of what we want but is perhaps porcelain. +> +> +> +> I suspect if we attempt to move more of our documentation to ReST and +> +> Sphinx that we will need to answer for ourselves how we intend to +> +> document QMP code flow examples. +> +> +Writing a custom lexer that handles "<-" and "->" was simple (see below). +> +> +Now, is it possible to convince Sphinx to register and use a custom lexer? +> +Spoilers, yes, and I've sent a patch to list. Thanks for your help! + diff --git a/results/classifier/014/none/55753058 b/results/classifier/014/none/55753058 new file mode 100644 index 00000000..188149c3 --- /dev/null +++ b/results/classifier/014/none/55753058 @@ -0,0 +1,320 @@ +risc-v: 0.789 +TCG: 0.743 +ppc: 0.731 +peripherals: 0.728 +hypervisor: 0.728 +x86: 0.713 +KVM: 0.713 +i386: 0.700 +operating system: 0.696 +vnc: 0.682 +VMM: 0.657 +mistranslation: 0.649 +user-level: 0.648 +graphic: 0.630 +device: 0.623 +register: 0.620 +debug: 0.611 +arm: 0.595 +performance: 0.591 +permissions: 0.580 +semantic: 0.577 +virtual: 0.539 +architecture: 0.534 +assembly: 0.529 +alpha: 0.529 +network: 0.525 +PID: 0.512 +kernel: 0.485 +boot: 0.478 +socket: 0.462 +files: 0.459 + +[RESEND][BUG FIX HELP] QEMU main thread endlessly hangs in __ppoll() + +Hi Genius, +I am a user of QEMU v4.2.0 and stuck in an interesting bug, which may still +exist in the mainline. +Thanks in advance to heroes who can take a look and share understanding. + +The qemu main thread endlessly hangs in the handle of the qmp statement: +{'execute': 'human-monitor-command', 'arguments':{ 'command-line': +'drive_del replication0' } } +and we have the call trace looks like: +#0 0x00007f3c22045bf6 in __ppoll (fds=0x555611328410, nfds=1, +timeout=<optimized out>, timeout@entry=0x7ffc56c66db0, +sigmask=sigmask@entry=0x0) at ../sysdeps/unix/sysv/linux/ppoll.c:44 +#1 0x000055561021f415 in ppoll (__ss=0x0, __timeout=0x7ffc56c66db0, +__nfds=<optimized out>, __fds=<optimized out>) +at /usr/include/x86_64-linux-gnu/bits/poll2.h:77 +#2 qemu_poll_ns (fds=<optimized out>, nfds=<optimized out>, +timeout=<optimized out>) at util/qemu-timer.c:348 +#3 0x0000555610221430 in aio_poll (ctx=ctx@entry=0x5556113010f0, +blocking=blocking@entry=true) at util/aio-posix.c:669 +#4 0x000055561019268d in bdrv_do_drained_begin (poll=true, +ignore_bds_parents=false, parent=0x0, recursive=false, +bs=0x55561138b0a0) at block/io.c:430 +#5 bdrv_do_drained_begin (bs=0x55561138b0a0, recursive=<optimized out>, +parent=0x0, ignore_bds_parents=<optimized out>, +poll=<optimized out>) at block/io.c:396 +#6 0x000055561017b60b in quorum_del_child (bs=0x55561138b0a0, +child=0x7f36dc0ce380, errp=<optimized out>) +at block/quorum.c:1063 +#7 0x000055560ff5836b in qmp_x_blockdev_change (parent=0x555612373120 +"colo-disk0", has_child=<optimized out>, +child=0x5556112df3e0 "children.1", has_node=<optimized out>, node=0x0, +errp=0x7ffc56c66f98) at blockdev.c:4494 +#8 0x00005556100f8f57 in qmp_marshal_x_blockdev_change (args=<optimized +out>, ret=<optimized out>, errp=0x7ffc56c67018) +at qapi/qapi-commands-block-core.c:1538 +#9 0x00005556101d8290 in do_qmp_dispatch (errp=0x7ffc56c67010, +allow_oob=<optimized out>, request=<optimized out>, +cmds=0x5556109c69a0 <qmp_commands>) at qapi/qmp-dispatch.c:132 +#10 qmp_dispatch (cmds=0x5556109c69a0 <qmp_commands>, request=<optimized +out>, allow_oob=<optimized out>) +at qapi/qmp-dispatch.c:175 +#11 0x00005556100d4c4d in monitor_qmp_dispatch (mon=0x5556113a6f40, +req=<optimized out>) at monitor/qmp.c:145 +#12 0x00005556100d5437 in monitor_qmp_bh_dispatcher (data=<optimized out>) +at monitor/qmp.c:234 +#13 0x000055561021dbec in aio_bh_call (bh=0x5556112164bGrateful0) at +util/async.c:117 +#14 aio_bh_poll (ctx=ctx@entry=0x5556112151b0) at util/async.c:117 +#15 0x00005556102212c4 in aio_dispatch (ctx=0x5556112151b0) at +util/aio-posix.c:459 +#16 0x000055561021dab2 in aio_ctx_dispatch (source=<optimized out>, +callback=<optimized out>, user_data=<optimized out>) +at util/async.c:260 +#17 0x00007f3c22302fbd in g_main_context_dispatch () from +/lib/x86_64-linux-gnu/libglib-2.0.so.0 +#18 0x0000555610220358 in glib_pollfds_poll () at util/main-loop.c:219 +#19 os_host_main_loop_wait (timeout=<optimized out>) at util/main-loop.c:242 +#20 main_loop_wait (nonblocking=<optimized out>) at util/main-loop.c:518 +#21 0x000055560ff600fe in main_loop () at vl.c:1814 +#22 0x000055560fddbce9 in main (argc=<optimized out>, argv=<optimized out>, +envp=<optimized out>) at vl.c:4503 +We found that we're doing endless check in the line of +block/io.c:bdrv_do_drained_begin(): +BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent)); +and it turns out that the bdrv_drain_poll() always get true from: +- bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents) +- AND atomic_read(&bs->in_flight) + +I personally think this is a deadlock issue in the a QEMU block layer +(as we know, we have some #FIXME comments in related codes, such as block +permisson update). +Any comments are welcome and appreciated. + +--- +thx,likexu + +On 2/28/21 9:39 PM, Like Xu wrote: +Hi Genius, +I am a user of QEMU v4.2.0 and stuck in an interesting bug, which may +still exist in the mainline. +Thanks in advance to heroes who can take a look and share understanding. +Do you have a test case that reproduces on 5.2? It'd be nice to know if +it was still a problem in the latest source tree or not. +--js +The qemu main thread endlessly hangs in the handle of the qmp statement: +{'execute': 'human-monitor-command', 'arguments':{ 'command-line': +'drive_del replication0' } } +and we have the call trace looks like: +#0 0x00007f3c22045bf6 in __ppoll (fds=0x555611328410, nfds=1, +timeout=<optimized out>, timeout@entry=0x7ffc56c66db0, +sigmask=sigmask@entry=0x0) at ../sysdeps/unix/sysv/linux/ppoll.c:44 +#1 0x000055561021f415 in ppoll (__ss=0x0, __timeout=0x7ffc56c66db0, +__nfds=<optimized out>, __fds=<optimized out>) +at /usr/include/x86_64-linux-gnu/bits/poll2.h:77 +#2 qemu_poll_ns (fds=<optimized out>, nfds=<optimized out>, +timeout=<optimized out>) at util/qemu-timer.c:348 +#3 0x0000555610221430 in aio_poll (ctx=ctx@entry=0x5556113010f0, +blocking=blocking@entry=true) at util/aio-posix.c:669 +#4 0x000055561019268d in bdrv_do_drained_begin (poll=true, +ignore_bds_parents=false, parent=0x0, recursive=false, +bs=0x55561138b0a0) at block/io.c:430 +#5 bdrv_do_drained_begin (bs=0x55561138b0a0, recursive=<optimized out>, +parent=0x0, ignore_bds_parents=<optimized out>, +poll=<optimized out>) at block/io.c:396 +#6 0x000055561017b60b in quorum_del_child (bs=0x55561138b0a0, +child=0x7f36dc0ce380, errp=<optimized out>) +at block/quorum.c:1063 +#7 0x000055560ff5836b in qmp_x_blockdev_change (parent=0x555612373120 +"colo-disk0", has_child=<optimized out>, +child=0x5556112df3e0 "children.1", has_node=<optimized out>, node=0x0, +errp=0x7ffc56c66f98) at blockdev.c:4494 +#8 0x00005556100f8f57 in qmp_marshal_x_blockdev_change (args=<optimized +out>, ret=<optimized out>, errp=0x7ffc56c67018) +at qapi/qapi-commands-block-core.c:1538 +#9 0x00005556101d8290 in do_qmp_dispatch (errp=0x7ffc56c67010, +allow_oob=<optimized out>, request=<optimized out>, +cmds=0x5556109c69a0 <qmp_commands>) at qapi/qmp-dispatch.c:132 +#10 qmp_dispatch (cmds=0x5556109c69a0 <qmp_commands>, request=<optimized +out>, allow_oob=<optimized out>) +at qapi/qmp-dispatch.c:175 +#11 0x00005556100d4c4d in monitor_qmp_dispatch (mon=0x5556113a6f40, +req=<optimized out>) at monitor/qmp.c:145 +#12 0x00005556100d5437 in monitor_qmp_bh_dispatcher (data=<optimized +out>) at monitor/qmp.c:234 +#13 0x000055561021dbec in aio_bh_call (bh=0x5556112164bGrateful0) at +util/async.c:117 +#14 aio_bh_poll (ctx=ctx@entry=0x5556112151b0) at util/async.c:117 +#15 0x00005556102212c4 in aio_dispatch (ctx=0x5556112151b0) at +util/aio-posix.c:459 +#16 0x000055561021dab2 in aio_ctx_dispatch (source=<optimized out>, +callback=<optimized out>, user_data=<optimized out>) +at util/async.c:260 +#17 0x00007f3c22302fbd in g_main_context_dispatch () from +/lib/x86_64-linux-gnu/libglib-2.0.so.0 +#18 0x0000555610220358 in glib_pollfds_poll () at util/main-loop.c:219 +#19 os_host_main_loop_wait (timeout=<optimized out>) at +util/main-loop.c:242 +#20 main_loop_wait (nonblocking=<optimized out>) at util/main-loop.c:518 +#21 0x000055560ff600fe in main_loop () at vl.c:1814 +#22 0x000055560fddbce9 in main (argc=<optimized out>, argv=<optimized +out>, envp=<optimized out>) at vl.c:4503 +We found that we're doing endless check in the line of +block/io.c:bdrv_do_drained_begin(): +    BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent)); +and it turns out that the bdrv_drain_poll() always get true from: +- bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents) +- AND atomic_read(&bs->in_flight) + +I personally think this is a deadlock issue in the a QEMU block layer +(as we know, we have some #FIXME comments in related codes, such as +block permisson update). +Any comments are welcome and appreciated. + +--- +thx,likexu + +Hi John, + +Thanks for your comment. + +On 2021/3/5 7:53, John Snow wrote: +On 2/28/21 9:39 PM, Like Xu wrote: +Hi Genius, +I am a user of QEMU v4.2.0 and stuck in an interesting bug, which may +still exist in the mainline. +Thanks in advance to heroes who can take a look and share understanding. +Do you have a test case that reproduces on 5.2? It'd be nice to know if it +was still a problem in the latest source tree or not. +We narrowed down the source of the bug, which basically came from +the following qmp usage: +{'execute': 'human-monitor-command', 'arguments':{ 'command-line': +'drive_del replication0' } } +One of the test cases is the COLO usage (docs/colo-proxy.txt). + +This issue is sporadic,the probability may be 1/15 for a io-heavy guest. + +I believe it's reproducible on 5.2 and the latest tree. +--js +The qemu main thread endlessly hangs in the handle of the qmp statement: +{'execute': 'human-monitor-command', 'arguments':{ 'command-line': +'drive_del replication0' } } +and we have the call trace looks like: +#0 0x00007f3c22045bf6 in __ppoll (fds=0x555611328410, nfds=1, +timeout=<optimized out>, timeout@entry=0x7ffc56c66db0, +sigmask=sigmask@entry=0x0) at ../sysdeps/unix/sysv/linux/ppoll.c:44 +#1 0x000055561021f415 in ppoll (__ss=0x0, __timeout=0x7ffc56c66db0, +__nfds=<optimized out>, __fds=<optimized out>) +at /usr/include/x86_64-linux-gnu/bits/poll2.h:77 +#2 qemu_poll_ns (fds=<optimized out>, nfds=<optimized out>, +timeout=<optimized out>) at util/qemu-timer.c:348 +#3 0x0000555610221430 in aio_poll (ctx=ctx@entry=0x5556113010f0, +blocking=blocking@entry=true) at util/aio-posix.c:669 +#4 0x000055561019268d in bdrv_do_drained_begin (poll=true, +ignore_bds_parents=false, parent=0x0, recursive=false, +bs=0x55561138b0a0) at block/io.c:430 +#5 bdrv_do_drained_begin (bs=0x55561138b0a0, recursive=<optimized out>, +parent=0x0, ignore_bds_parents=<optimized out>, +poll=<optimized out>) at block/io.c:396 +#6 0x000055561017b60b in quorum_del_child (bs=0x55561138b0a0, +child=0x7f36dc0ce380, errp=<optimized out>) +at block/quorum.c:1063 +#7 0x000055560ff5836b in qmp_x_blockdev_change (parent=0x555612373120 +"colo-disk0", has_child=<optimized out>, +child=0x5556112df3e0 "children.1", has_node=<optimized out>, node=0x0, +errp=0x7ffc56c66f98) at blockdev.c:4494 +#8 0x00005556100f8f57 in qmp_marshal_x_blockdev_change (args=<optimized +out>, ret=<optimized out>, errp=0x7ffc56c67018) +at qapi/qapi-commands-block-core.c:1538 +#9 0x00005556101d8290 in do_qmp_dispatch (errp=0x7ffc56c67010, +allow_oob=<optimized out>, request=<optimized out>, +cmds=0x5556109c69a0 <qmp_commands>) at qapi/qmp-dispatch.c:132 +#10 qmp_dispatch (cmds=0x5556109c69a0 <qmp_commands>, request=<optimized +out>, allow_oob=<optimized out>) +at qapi/qmp-dispatch.c:175 +#11 0x00005556100d4c4d in monitor_qmp_dispatch (mon=0x5556113a6f40, +req=<optimized out>) at monitor/qmp.c:145 +#12 0x00005556100d5437 in monitor_qmp_bh_dispatcher (data=<optimized +out>) at monitor/qmp.c:234 +#13 0x000055561021dbec in aio_bh_call (bh=0x5556112164bGrateful0) at +util/async.c:117 +#14 aio_bh_poll (ctx=ctx@entry=0x5556112151b0) at util/async.c:117 +#15 0x00005556102212c4 in aio_dispatch (ctx=0x5556112151b0) at +util/aio-posix.c:459 +#16 0x000055561021dab2 in aio_ctx_dispatch (source=<optimized out>, +callback=<optimized out>, user_data=<optimized out>) +at util/async.c:260 +#17 0x00007f3c22302fbd in g_main_context_dispatch () from +/lib/x86_64-linux-gnu/libglib-2.0.so.0 +#18 0x0000555610220358 in glib_pollfds_poll () at util/main-loop.c:219 +#19 os_host_main_loop_wait (timeout=<optimized out>) at util/main-loop.c:242 +#20 main_loop_wait (nonblocking=<optimized out>) at util/main-loop.c:518 +#21 0x000055560ff600fe in main_loop () at vl.c:1814 +#22 0x000055560fddbce9 in main (argc=<optimized out>, argv=<optimized +out>, envp=<optimized out>) at vl.c:4503 +We found that we're doing endless check in the line of +block/io.c:bdrv_do_drained_begin(): +     BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent)); +and it turns out that the bdrv_drain_poll() always get true from: +- bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents) +- AND atomic_read(&bs->in_flight) + +I personally think this is a deadlock issue in the a QEMU block layer +(as we know, we have some #FIXME comments in related codes, such as block +permisson update). +Any comments are welcome and appreciated. + +--- +thx,likexu + +On 3/4/21 10:08 PM, Like Xu wrote: +Hi John, + +Thanks for your comment. + +On 2021/3/5 7:53, John Snow wrote: +On 2/28/21 9:39 PM, Like Xu wrote: +Hi Genius, +I am a user of QEMU v4.2.0 and stuck in an interesting bug, which may +still exist in the mainline. +Thanks in advance to heroes who can take a look and share understanding. +Do you have a test case that reproduces on 5.2? It'd be nice to know +if it was still a problem in the latest source tree or not. +We narrowed down the source of the bug, which basically came from +the following qmp usage: +{'execute': 'human-monitor-command', 'arguments':{ 'command-line': +'drive_del replication0' } } +One of the test cases is the COLO usage (docs/colo-proxy.txt). + +This issue is sporadic,the probability may be 1/15 for a io-heavy guest. + +I believe it's reproducible on 5.2 and the latest tree. +Can you please test and confirm that this is the case, and then file a +bug report on the LP: +https://launchpad.net/qemu +and include: +- The exact commit you used (current origin/master debug build would be +the most ideal.) +- Which QEMU binary you are using (qemu-system-x86_64?) +- The shortest command line you are aware of that reproduces the problem +- The host OS and kernel version +- An updated call trace +- Any relevant commands issued prior to the one that caused the hang; or +detailed reproduction steps if possible. +Thanks, +--js + diff --git a/results/classifier/014/none/56309929 b/results/classifier/014/none/56309929 new file mode 100644 index 00000000..ef354b0e --- /dev/null +++ b/results/classifier/014/none/56309929 @@ -0,0 +1,207 @@ +user-level: 0.684 +register: 0.658 +device: 0.646 +VMM: 0.643 +TCG: 0.637 +KVM: 0.636 +virtual: 0.623 +assembly: 0.618 +performance: 0.608 +ppc: 0.606 +vnc: 0.600 +network: 0.589 +permissions: 0.587 +debug: 0.585 +arm: 0.580 +architecture: 0.579 +boot: 0.578 +risc-v: 0.574 +graphic: 0.570 +PID: 0.561 +mistranslation: 0.554 +operating system: 0.543 +hypervisor: 0.521 +semantic: 0.521 +socket: 0.516 +kernel: 0.487 +alpha: 0.480 +x86: 0.465 +peripherals: 0.387 +files: 0.311 +i386: 0.269 + +[Qemu-devel] [BUG 2.6] Broken CONFIG_TPM? + +A compilation test with clang -Weverything reported this problem: + +config-host.h:112:20: warning: '$' in identifier +[-Wdollar-in-identifier-extension] + +The line of code looks like this: + +#define CONFIG_TPM $(CONFIG_SOFTMMU) + +This is fine for Makefile code, but won't work as expected in C code. + +Am 28.04.2016 um 22:33 schrieb Stefan Weil: +> +A compilation test with clang -Weverything reported this problem: +> +> +config-host.h:112:20: warning: '$' in identifier +> +[-Wdollar-in-identifier-extension] +> +> +The line of code looks like this: +> +> +#define CONFIG_TPM $(CONFIG_SOFTMMU) +> +> +This is fine for Makefile code, but won't work as expected in C code. +> +A complete 64 bit build with clang -Weverything creates a log file of +1.7 GB. +Here are the uniq warnings sorted by their frequency: + + 1 -Wflexible-array-extensions + 1 -Wgnu-folding-constant + 1 -Wunknown-pragmas + 1 -Wunknown-warning-option + 1 -Wunreachable-code-loop-increment + 2 -Warray-bounds-pointer-arithmetic + 2 -Wdollar-in-identifier-extension + 3 -Woverlength-strings + 3 -Wweak-vtables + 4 -Wgnu-empty-struct + 4 -Wstring-conversion + 6 -Wclass-varargs + 7 -Wc99-extensions + 7 -Wc++-compat + 8 -Wfloat-equal + 11 -Wformat-nonliteral + 16 -Wshift-negative-value + 19 -Wglobal-constructors + 28 -Wc++11-long-long + 29 -Wembedded-directive + 38 -Wvla + 40 -Wcovered-switch-default + 40 -Wmissing-variable-declarations + 49 -Wold-style-cast + 53 -Wgnu-conditional-omitted-operand + 56 -Wformat-pedantic + 61 -Wvariadic-macros + 77 -Wc++11-extensions + 83 -Wgnu-flexible-array-initializer + 83 -Wzero-length-array + 96 -Wgnu-designator + 102 -Wmissing-noreturn + 103 -Wconditional-uninitialized + 107 -Wdisabled-macro-expansion + 115 -Wunreachable-code-return + 134 -Wunreachable-code + 243 -Wunreachable-code-break + 257 -Wfloat-conversion + 280 -Wswitch-enum + 291 -Wpointer-arith + 298 -Wshadow + 378 -Wassign-enum + 395 -Wused-but-marked-unused + 420 -Wreserved-id-macro + 493 -Wdocumentation + 510 -Wshift-sign-overflow + 565 -Wgnu-case-range + 566 -Wgnu-zero-variadic-macro-arguments + 650 -Wbad-function-cast + 705 -Wmissing-field-initializers + 817 -Wgnu-statement-expression + 968 -Wdocumentation-unknown-command + 1021 -Wextra-semi + 1112 -Wgnu-empty-initializer + 1138 -Wcast-qual + 1509 -Wcast-align + 1766 -Wextended-offsetof + 1937 -Wsign-compare + 2130 -Wpacked + 2404 -Wunused-macros + 3081 -Wpadded + 4182 -Wconversion + 5430 -Wlanguage-extension-token + 6655 -Wshorten-64-to-32 + 6995 -Wpedantic + 7354 -Wunused-parameter + 27659 -Wsign-conversion + +Stefan Weil <address@hidden> writes: + +> +A compilation test with clang -Weverything reported this problem: +> +> +config-host.h:112:20: warning: '$' in identifier +> +[-Wdollar-in-identifier-extension] +> +> +The line of code looks like this: +> +> +#define CONFIG_TPM $(CONFIG_SOFTMMU) +> +> +This is fine for Makefile code, but won't work as expected in C code. +Broken in commit 3b8acc1 "configure: fix TPM logic". Cc'ing Paolo. + +Impact: #ifdef CONFIG_TPM never disables code. There are no other uses +of CONFIG_TPM in C code. + +I had a quick peek at configure and create_config, but refrained from +attempting to fix this, since I don't understand when exactly CONFIG_TPM +should be defined. + +On 29 April 2016 at 08:42, Markus Armbruster <address@hidden> wrote: +> +Stefan Weil <address@hidden> writes: +> +> +> A compilation test with clang -Weverything reported this problem: +> +> +> +> config-host.h:112:20: warning: '$' in identifier +> +> [-Wdollar-in-identifier-extension] +> +> +> +> The line of code looks like this: +> +> +> +> #define CONFIG_TPM $(CONFIG_SOFTMMU) +> +> +> +> This is fine for Makefile code, but won't work as expected in C code. +> +> +Broken in commit 3b8acc1 "configure: fix TPM logic". Cc'ing Paolo. +> +> +Impact: #ifdef CONFIG_TPM never disables code. There are no other uses +> +of CONFIG_TPM in C code. +> +> +I had a quick peek at configure and create_config, but refrained from +> +attempting to fix this, since I don't understand when exactly CONFIG_TPM +> +should be defined. +Looking at 'git blame' suggests this has been wrong like this for +some years, so we don't need to scramble to fix it for 2.6. + +thanks +-- PMM + diff --git a/results/classifier/014/none/65781993 b/results/classifier/014/none/65781993 new file mode 100644 index 00000000..2f2f3a17 --- /dev/null +++ b/results/classifier/014/none/65781993 @@ -0,0 +1,2820 @@ +risc-v: 0.745 +user-level: 0.697 +PID: 0.673 +debug: 0.673 +arm: 0.672 +virtual: 0.670 +assembly: 0.666 +semantic: 0.665 +graphic: 0.664 +alpha: 0.662 +socket: 0.660 +operating system: 0.660 +register: 0.659 +permissions: 0.658 +architecture: 0.658 +network: 0.657 +files: 0.657 +kernel: 0.656 +mistranslation: 0.650 +device: 0.647 +performance: 0.636 +boot: 0.635 +KVM: 0.627 +peripherals: 0.624 +i386: 0.612 +VMM: 0.612 +TCG: 0.607 +vnc: 0.590 +hypervisor: 0.586 +x86: 0.579 +ppc: 0.557 + +[Qemu-devel] 答复: Re: 答复: Re: [BUG]COLO failover hang + +Thank youã + +I have test areadyã + +When the Primary Node panic,the Secondary Node qemu hang at the same placeã + +Incorrding +http://wiki.qemu-project.org/Features/COLO +ï¼kill Primary Node qemu +will not produce the problem,but Primary Node panic canã + +I think due to the feature of channel does not support +QIO_CHANNEL_FEATURE_SHUTDOWN. + + +when failover,channel_shutdown could not shut down the channel. + + +so the colo_process_incoming_thread will hang at recvmsg. + + +I test a patch: + + +diff --git a/migration/socket.c b/migration/socket.c + + +index 13966f1..d65a0ea 100644 + + +--- a/migration/socket.c + + ++++ b/migration/socket.c + + +@@ -147,8 +147,9 @@ static gboolean socket_accept_incoming_migration(QIOChannel +*ioc, + + + } + + + + + + trace_migration_socket_incoming_accepted() + + + + + + qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") + + ++ qio_channel_set_feature(QIO_CHANNEL(sioc), QIO_CHANNEL_FEATURE_SHUTDOWN) + + + migration_channel_process_incoming(migrate_get_current(), + + + QIO_CHANNEL(sioc)) + + + object_unref(OBJECT(sioc)) + + + + +My test will not hang any more. + + + + + + + + + + + + + + + + + +åå§é®ä»¶ + + + +åä»¶äººï¼ address@hidden +æ¶ä»¶äººï¼ç广10165992 address@hidden +æéäººï¼ address@hidden address@hidden +æ¥ æ ï¼2017å¹´03æ21æ¥ 15:58 +主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: [BUG]COLO failover hang + + + + + +Hi,Wang. + +You can test this branch: +https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk +and please follow wiki ensure your own configuration correctly. +http://wiki.qemu-project.org/Features/COLO +Thanks + +Zhang Chen + + +On 03/21/2017 03:27 PM, address@hidden wrote: +ï¼ +ï¼ hi. +ï¼ +ï¼ I test the git qemu master have the same problem. +ï¼ +ï¼ (gdb) bt +ï¼ +ï¼ #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, +ï¼ niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 +ï¼ +ï¼ #1 0x00007f658e4aa0c2 in qio_channel_read +ï¼ (address@hidden, address@hidden "", +ï¼ address@hidden, address@hidden) at io/channel.c:114 +ï¼ +ï¼ #2 0x00007f658e3ea990 in channel_get_buffer (opaque=ï¼optimized outï¼, +ï¼ buf=0x7f65907cb838 "", pos=ï¼optimized outï¼, size=32768) at +ï¼ migration/qemu-file-channel.c:78 +ï¼ +ï¼ #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at +ï¼ migration/qemu-file.c:295 +ï¼ +ï¼ #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, +ï¼ address@hidden) at migration/qemu-file.c:555 +ï¼ +ï¼ #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at +ï¼ migration/qemu-file.c:568 +ï¼ +ï¼ #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at +ï¼ migration/qemu-file.c:648 +ï¼ +ï¼ #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, +ï¼ address@hidden) at migration/colo.c:244 +ï¼ +ï¼ #8 0x00007f658e3e681e in colo_receive_check_message (f=ï¼optimized +ï¼ outï¼, address@hidden, +ï¼ address@hidden) +ï¼ +ï¼ at migration/colo.c:264 +ï¼ +ï¼ #9 0x00007f658e3e740e in colo_process_incoming_thread +ï¼ (opaque=0x7f658eb30360 ï¼mis_current.31286ï¼) at migration/colo.c:577 +ï¼ +ï¼ #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 +ï¼ +ï¼ #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 +ï¼ +ï¼ (gdb) p ioc-ï¼name +ï¼ +ï¼ $2 = 0x7f658ff7d5c0 "migration-socket-incoming" +ï¼ +ï¼ (gdb) p ioc-ï¼features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN +ï¼ +ï¼ $3 = 0 +ï¼ +ï¼ +ï¼ (gdb) bt +ï¼ +ï¼ #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, +ï¼ condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 +ï¼ +ï¼ #1 0x00007fdcc6966350 in g_main_dispatch (context=ï¼optimized outï¼) at +ï¼ gmain.c:3054 +ï¼ +ï¼ #2 g_main_context_dispatch (context=ï¼optimized outï¼, +ï¼ address@hidden) at gmain.c:3630 +ï¼ +ï¼ #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 +ï¼ +ï¼ #4 os_host_main_loop_wait (timeout=ï¼optimized outï¼) at +ï¼ util/main-loop.c:258 +ï¼ +ï¼ #5 main_loop_wait (address@hidden) at +ï¼ util/main-loop.c:506 +ï¼ +ï¼ #6 0x00007fdccb526187 in main_loop () at vl.c:1898 +ï¼ +ï¼ #7 main (argc=ï¼optimized outï¼, argv=ï¼optimized outï¼, envp=ï¼optimized +ï¼ outï¼) at vl.c:4709 +ï¼ +ï¼ (gdb) p ioc-ï¼features +ï¼ +ï¼ $1 = 6 +ï¼ +ï¼ (gdb) p ioc-ï¼name +ï¼ +ï¼ $2 = 0x7fdcce1b1ab0 "migration-socket-listener" +ï¼ +ï¼ +ï¼ May be socket_accept_incoming_migration should +ï¼ call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? +ï¼ +ï¼ +ï¼ thank you. +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ åå§é®ä»¶ +ï¼ address@hidden +ï¼ address@hidden +ï¼ address@hidden@huawei.comï¼ +ï¼ *æ¥ æ ï¼*2017å¹´03æ16æ¥ 14:46 +ï¼ *主 é¢ ï¼**Re: [Qemu-devel] COLO failover hang* +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ On 03/15/2017 05:06 PM, wangguang wrote: +ï¼ ï¼ am testing QEMU COLO feature described here [QEMU +ï¼ ï¼ Wiki]( +http://wiki.qemu-project.org/Features/COLO +). +ï¼ ï¼ +ï¼ ï¼ When the Primary Node panic,the Secondary Node qemu hang. +ï¼ ï¼ hang at recvmsg in qio_channel_socket_readv. +ï¼ ï¼ And I run { 'execute': 'nbd-server-stop' } and { "execute": +ï¼ ï¼ "x-colo-lost-heartbeat" } in Secondary VM's +ï¼ ï¼ monitor,the Secondary Node qemu still hang at recvmsg . +ï¼ ï¼ +ï¼ ï¼ I found that the colo in qemu is not complete yet. +ï¼ ï¼ Do the colo have any plan for development? +ï¼ +ï¼ Yes, We are developing. You can see some of patch we pushing. +ï¼ +ï¼ ï¼ Has anyone ever run it successfully? Any help is appreciated! +ï¼ +ï¼ In our internal version can run it successfully, +ï¼ The failover detail you can ask Zhanghailiang for help. +ï¼ Next time if you have some question about COLO, +ï¼ please cc me and zhanghailiang address@hidden +ï¼ +ï¼ +ï¼ Thanks +ï¼ Zhang Chen +ï¼ +ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ centos7.2+qemu2.7.50 +ï¼ ï¼ (gdb) bt +ï¼ ï¼ #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 +ï¼ ï¼ #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=ï¼optimized outï¼, +ï¼ ï¼ iov=ï¼optimized outï¼, niov=ï¼optimized outï¼, fds=0x0, nfds=0x0, errp=0x0) at +ï¼ ï¼ io/channel-socket.c:497 +ï¼ ï¼ #2 0x00007f3e03329472 in qio_channel_read (address@hidden, +ï¼ ï¼ address@hidden "", address@hidden, +ï¼ ï¼ address@hidden) at io/channel.c:97 +ï¼ ï¼ #3 0x00007f3e032750e0 in channel_get_buffer (opaque=ï¼optimized outï¼, +ï¼ ï¼ buf=0x7f3e05910f38 "", pos=ï¼optimized outï¼, size=32768) at +ï¼ ï¼ migration/qemu-file-channel.c:78 +ï¼ ï¼ #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at +ï¼ ï¼ migration/qemu-file.c:257 +ï¼ ï¼ #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, +ï¼ ï¼ address@hidden) at migration/qemu-file.c:510 +ï¼ ï¼ #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at +ï¼ ï¼ migration/qemu-file.c:523 +ï¼ ï¼ #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at +ï¼ ï¼ migration/qemu-file.c:603 +ï¼ ï¼ #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, +ï¼ ï¼ address@hidden) at migration/colo.c:215 +ï¼ ï¼ #9 0x00007f3e0327250d in colo_wait_handle_message (errp=0x7f3d62bfaa48, +ï¼ ï¼ checkpoint_request=ï¼synthetic pointerï¼, f=ï¼optimized outï¼) at +ï¼ ï¼ migration/colo.c:546 +ï¼ ï¼ #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at +ï¼ ï¼ migration/colo.c:649 +ï¼ ï¼ #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 +ï¼ ï¼ #12 0x00007f3dfc9c03ed in clone () from /lib64/libc.so.6 +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ -- +ï¼ ï¼ View this message in context: +http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html +ï¼ ï¼ Sent from the Developer mailing list archive at Nabble.com. +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ +ï¼ -- +ï¼ Thanks +ï¼ Zhang Chen +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ + +-- +Thanks +Zhang Chen + +Hi, + +On 2017/3/21 16:10, address@hidden wrote: +Thank youã + +I have test areadyã + +When the Primary Node panic,the Secondary Node qemu hang at the same placeã + +Incorrding +http://wiki.qemu-project.org/Features/COLO +ï¼kill Primary Node qemu +will not produce the problem,but Primary Node panic canã + +I think due to the feature of channel does not support +QIO_CHANNEL_FEATURE_SHUTDOWN. +Yes, you are right, when we do failover for primary/secondary VM, we will +shutdown the related +fd in case it is stuck in the read/write fd. + +It seems that you didn't follow the above introduction exactly to do the test. +Could you +share your test procedures ? Especially the commands used in the test. + +Thanks, +Hailiang +when failover,channel_shutdown could not shut down the channel. + + +so the colo_process_incoming_thread will hang at recvmsg. + + +I test a patch: + + +diff --git a/migration/socket.c b/migration/socket.c + + +index 13966f1..d65a0ea 100644 + + +--- a/migration/socket.c + + ++++ b/migration/socket.c + + +@@ -147,8 +147,9 @@ static gboolean socket_accept_incoming_migration(QIOChannel +*ioc, + + + } + + + + + + trace_migration_socket_incoming_accepted() + + + + + + qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") + + ++ qio_channel_set_feature(QIO_CHANNEL(sioc), QIO_CHANNEL_FEATURE_SHUTDOWN) + + + migration_channel_process_incoming(migrate_get_current(), + + + QIO_CHANNEL(sioc)) + + + object_unref(OBJECT(sioc)) + + + + +My test will not hang any more. + + + + + + + + + + + + + + + + + +åå§é®ä»¶ + + + +åä»¶äººï¼ address@hidden +æ¶ä»¶äººï¼ç广10165992 address@hidden +æéäººï¼ address@hidden address@hidden +æ¥ æ ï¼2017å¹´03æ21æ¥ 15:58 +主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: [BUG]COLO failover hang + + + + + +Hi,Wang. + +You can test this branch: +https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk +and please follow wiki ensure your own configuration correctly. +http://wiki.qemu-project.org/Features/COLO +Thanks + +Zhang Chen + + +On 03/21/2017 03:27 PM, address@hidden wrote: +ï¼ +ï¼ hi. +ï¼ +ï¼ I test the git qemu master have the same problem. +ï¼ +ï¼ (gdb) bt +ï¼ +ï¼ #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, +ï¼ niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 +ï¼ +ï¼ #1 0x00007f658e4aa0c2 in qio_channel_read +ï¼ (address@hidden, address@hidden "", +ï¼ address@hidden, address@hidden) at io/channel.c:114 +ï¼ +ï¼ #2 0x00007f658e3ea990 in channel_get_buffer (opaque=ï¼optimized outï¼, +ï¼ buf=0x7f65907cb838 "", pos=ï¼optimized outï¼, size=32768) at +ï¼ migration/qemu-file-channel.c:78 +ï¼ +ï¼ #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at +ï¼ migration/qemu-file.c:295 +ï¼ +ï¼ #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, +ï¼ address@hidden) at migration/qemu-file.c:555 +ï¼ +ï¼ #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at +ï¼ migration/qemu-file.c:568 +ï¼ +ï¼ #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at +ï¼ migration/qemu-file.c:648 +ï¼ +ï¼ #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, +ï¼ address@hidden) at migration/colo.c:244 +ï¼ +ï¼ #8 0x00007f658e3e681e in colo_receive_check_message (f=ï¼optimized +ï¼ outï¼, address@hidden, +ï¼ address@hidden) +ï¼ +ï¼ at migration/colo.c:264 +ï¼ +ï¼ #9 0x00007f658e3e740e in colo_process_incoming_thread +ï¼ (opaque=0x7f658eb30360 ï¼mis_current.31286ï¼) at migration/colo.c:577 +ï¼ +ï¼ #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 +ï¼ +ï¼ #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 +ï¼ +ï¼ (gdb) p ioc-ï¼name +ï¼ +ï¼ $2 = 0x7f658ff7d5c0 "migration-socket-incoming" +ï¼ +ï¼ (gdb) p ioc-ï¼features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN +ï¼ +ï¼ $3 = 0 +ï¼ +ï¼ +ï¼ (gdb) bt +ï¼ +ï¼ #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, +ï¼ condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 +ï¼ +ï¼ #1 0x00007fdcc6966350 in g_main_dispatch (context=ï¼optimized outï¼) at +ï¼ gmain.c:3054 +ï¼ +ï¼ #2 g_main_context_dispatch (context=ï¼optimized outï¼, +ï¼ address@hidden) at gmain.c:3630 +ï¼ +ï¼ #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 +ï¼ +ï¼ #4 os_host_main_loop_wait (timeout=ï¼optimized outï¼) at +ï¼ util/main-loop.c:258 +ï¼ +ï¼ #5 main_loop_wait (address@hidden) at +ï¼ util/main-loop.c:506 +ï¼ +ï¼ #6 0x00007fdccb526187 in main_loop () at vl.c:1898 +ï¼ +ï¼ #7 main (argc=ï¼optimized outï¼, argv=ï¼optimized outï¼, envp=ï¼optimized +ï¼ outï¼) at vl.c:4709 +ï¼ +ï¼ (gdb) p ioc-ï¼features +ï¼ +ï¼ $1 = 6 +ï¼ +ï¼ (gdb) p ioc-ï¼name +ï¼ +ï¼ $2 = 0x7fdcce1b1ab0 "migration-socket-listener" +ï¼ +ï¼ +ï¼ May be socket_accept_incoming_migration should +ï¼ call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? +ï¼ +ï¼ +ï¼ thank you. +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ åå§é®ä»¶ +ï¼ address@hidden +ï¼ address@hidden +ï¼ address@hidden@huawei.comï¼ +ï¼ *æ¥ æ ï¼*2017å¹´03æ16æ¥ 14:46 +ï¼ *主 é¢ ï¼**Re: [Qemu-devel] COLO failover hang* +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ On 03/15/2017 05:06 PM, wangguang wrote: +ï¼ ï¼ am testing QEMU COLO feature described here [QEMU +ï¼ ï¼ Wiki]( +http://wiki.qemu-project.org/Features/COLO +). +ï¼ ï¼ +ï¼ ï¼ When the Primary Node panic,the Secondary Node qemu hang. +ï¼ ï¼ hang at recvmsg in qio_channel_socket_readv. +ï¼ ï¼ And I run { 'execute': 'nbd-server-stop' } and { "execute": +ï¼ ï¼ "x-colo-lost-heartbeat" } in Secondary VM's +ï¼ ï¼ monitor,the Secondary Node qemu still hang at recvmsg . +ï¼ ï¼ +ï¼ ï¼ I found that the colo in qemu is not complete yet. +ï¼ ï¼ Do the colo have any plan for development? +ï¼ +ï¼ Yes, We are developing. You can see some of patch we pushing. +ï¼ +ï¼ ï¼ Has anyone ever run it successfully? Any help is appreciated! +ï¼ +ï¼ In our internal version can run it successfully, +ï¼ The failover detail you can ask Zhanghailiang for help. +ï¼ Next time if you have some question about COLO, +ï¼ please cc me and zhanghailiang address@hidden +ï¼ +ï¼ +ï¼ Thanks +ï¼ Zhang Chen +ï¼ +ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ centos7.2+qemu2.7.50 +ï¼ ï¼ (gdb) bt +ï¼ ï¼ #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 +ï¼ ï¼ #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=ï¼optimized outï¼, +ï¼ ï¼ iov=ï¼optimized outï¼, niov=ï¼optimized outï¼, fds=0x0, nfds=0x0, errp=0x0) at +ï¼ ï¼ io/channel-socket.c:497 +ï¼ ï¼ #2 0x00007f3e03329472 in qio_channel_read (address@hidden, +ï¼ ï¼ address@hidden "", address@hidden, +ï¼ ï¼ address@hidden) at io/channel.c:97 +ï¼ ï¼ #3 0x00007f3e032750e0 in channel_get_buffer (opaque=ï¼optimized outï¼, +ï¼ ï¼ buf=0x7f3e05910f38 "", pos=ï¼optimized outï¼, size=32768) at +ï¼ ï¼ migration/qemu-file-channel.c:78 +ï¼ ï¼ #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at +ï¼ ï¼ migration/qemu-file.c:257 +ï¼ ï¼ #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, +ï¼ ï¼ address@hidden) at migration/qemu-file.c:510 +ï¼ ï¼ #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at +ï¼ ï¼ migration/qemu-file.c:523 +ï¼ ï¼ #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at +ï¼ ï¼ migration/qemu-file.c:603 +ï¼ ï¼ #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, +ï¼ ï¼ address@hidden) at migration/colo.c:215 +ï¼ ï¼ #9 0x00007f3e0327250d in colo_wait_handle_message (errp=0x7f3d62bfaa48, +ï¼ ï¼ checkpoint_request=ï¼synthetic pointerï¼, f=ï¼optimized outï¼) at +ï¼ ï¼ migration/colo.c:546 +ï¼ ï¼ #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at +ï¼ ï¼ migration/colo.c:649 +ï¼ ï¼ #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 +ï¼ ï¼ #12 0x00007f3dfc9c03ed in clone () from /lib64/libc.so.6 +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ -- +ï¼ ï¼ View this message in context: +http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html +ï¼ ï¼ Sent from the Developer mailing list archive at Nabble.com. +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ +ï¼ -- +ï¼ Thanks +ï¼ Zhang Chen +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ + +Hi, + +Thanks for reporting this, and i confirmed it in my test, and it is a bug. + +Though we tried to call qemu_file_shutdown() to shutdown the related fd, in +case COLO thread/incoming thread is stuck in read/write() while do failover, +but it didn't take effect, because all the fd used by COLO (also migration) +has been wrapped by qio channel, and it will not call the shutdown API if +we didn't qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN). + +Cc: Dr. David Alan Gilbert <address@hidden> + +I doubted migration cancel has the same problem, it may be stuck in write() +if we tried to cancel migration. + +void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error +**errp) +{ + qio_channel_set_name(QIO_CHANNEL(ioc), "migration-fd-outgoing"); + migration_channel_connect(s, ioc, NULL); + ... ... +We didn't call qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN) above, +and the +migrate_fd_cancel() +{ + ... ... + if (s->state == MIGRATION_STATUS_CANCELLING && f) { + qemu_file_shutdown(f); --> This will not take effect. No ? + } +} + +Thanks, +Hailiang + +On 2017/3/21 16:10, address@hidden wrote: +Thank youã + +I have test areadyã + +When the Primary Node panic,the Secondary Node qemu hang at the same placeã + +Incorrding +http://wiki.qemu-project.org/Features/COLO +ï¼kill Primary Node qemu +will not produce the problem,but Primary Node panic canã + +I think due to the feature of channel does not support +QIO_CHANNEL_FEATURE_SHUTDOWN. + + +when failover,channel_shutdown could not shut down the channel. + + +so the colo_process_incoming_thread will hang at recvmsg. + + +I test a patch: + + +diff --git a/migration/socket.c b/migration/socket.c + + +index 13966f1..d65a0ea 100644 + + +--- a/migration/socket.c + + ++++ b/migration/socket.c + + +@@ -147,8 +147,9 @@ static gboolean socket_accept_incoming_migration(QIOChannel +*ioc, + + + } + + + + + + trace_migration_socket_incoming_accepted() + + + + + + qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") + + ++ qio_channel_set_feature(QIO_CHANNEL(sioc), QIO_CHANNEL_FEATURE_SHUTDOWN) + + + migration_channel_process_incoming(migrate_get_current(), + + + QIO_CHANNEL(sioc)) + + + object_unref(OBJECT(sioc)) + + + + +My test will not hang any more. + + + + + + + + + + + + + + + + + +åå§é®ä»¶ + + + +åä»¶äººï¼ address@hidden +æ¶ä»¶äººï¼ç广10165992 address@hidden +æéäººï¼ address@hidden address@hidden +æ¥ æ ï¼2017å¹´03æ21æ¥ 15:58 +主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: [BUG]COLO failover hang + + + + + +Hi,Wang. + +You can test this branch: +https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk +and please follow wiki ensure your own configuration correctly. +http://wiki.qemu-project.org/Features/COLO +Thanks + +Zhang Chen + + +On 03/21/2017 03:27 PM, address@hidden wrote: +ï¼ +ï¼ hi. +ï¼ +ï¼ I test the git qemu master have the same problem. +ï¼ +ï¼ (gdb) bt +ï¼ +ï¼ #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, +ï¼ niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 +ï¼ +ï¼ #1 0x00007f658e4aa0c2 in qio_channel_read +ï¼ (address@hidden, address@hidden "", +ï¼ address@hidden, address@hidden) at io/channel.c:114 +ï¼ +ï¼ #2 0x00007f658e3ea990 in channel_get_buffer (opaque=ï¼optimized outï¼, +ï¼ buf=0x7f65907cb838 "", pos=ï¼optimized outï¼, size=32768) at +ï¼ migration/qemu-file-channel.c:78 +ï¼ +ï¼ #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at +ï¼ migration/qemu-file.c:295 +ï¼ +ï¼ #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, +ï¼ address@hidden) at migration/qemu-file.c:555 +ï¼ +ï¼ #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at +ï¼ migration/qemu-file.c:568 +ï¼ +ï¼ #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at +ï¼ migration/qemu-file.c:648 +ï¼ +ï¼ #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, +ï¼ address@hidden) at migration/colo.c:244 +ï¼ +ï¼ #8 0x00007f658e3e681e in colo_receive_check_message (f=ï¼optimized +ï¼ outï¼, address@hidden, +ï¼ address@hidden) +ï¼ +ï¼ at migration/colo.c:264 +ï¼ +ï¼ #9 0x00007f658e3e740e in colo_process_incoming_thread +ï¼ (opaque=0x7f658eb30360 ï¼mis_current.31286ï¼) at migration/colo.c:577 +ï¼ +ï¼ #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 +ï¼ +ï¼ #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 +ï¼ +ï¼ (gdb) p ioc-ï¼name +ï¼ +ï¼ $2 = 0x7f658ff7d5c0 "migration-socket-incoming" +ï¼ +ï¼ (gdb) p ioc-ï¼features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN +ï¼ +ï¼ $3 = 0 +ï¼ +ï¼ +ï¼ (gdb) bt +ï¼ +ï¼ #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, +ï¼ condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 +ï¼ +ï¼ #1 0x00007fdcc6966350 in g_main_dispatch (context=ï¼optimized outï¼) at +ï¼ gmain.c:3054 +ï¼ +ï¼ #2 g_main_context_dispatch (context=ï¼optimized outï¼, +ï¼ address@hidden) at gmain.c:3630 +ï¼ +ï¼ #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 +ï¼ +ï¼ #4 os_host_main_loop_wait (timeout=ï¼optimized outï¼) at +ï¼ util/main-loop.c:258 +ï¼ +ï¼ #5 main_loop_wait (address@hidden) at +ï¼ util/main-loop.c:506 +ï¼ +ï¼ #6 0x00007fdccb526187 in main_loop () at vl.c:1898 +ï¼ +ï¼ #7 main (argc=ï¼optimized outï¼, argv=ï¼optimized outï¼, envp=ï¼optimized +ï¼ outï¼) at vl.c:4709 +ï¼ +ï¼ (gdb) p ioc-ï¼features +ï¼ +ï¼ $1 = 6 +ï¼ +ï¼ (gdb) p ioc-ï¼name +ï¼ +ï¼ $2 = 0x7fdcce1b1ab0 "migration-socket-listener" +ï¼ +ï¼ +ï¼ May be socket_accept_incoming_migration should +ï¼ call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? +ï¼ +ï¼ +ï¼ thank you. +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ åå§é®ä»¶ +ï¼ address@hidden +ï¼ address@hidden +ï¼ address@hidden@huawei.comï¼ +ï¼ *æ¥ æ ï¼*2017å¹´03æ16æ¥ 14:46 +ï¼ *主 é¢ ï¼**Re: [Qemu-devel] COLO failover hang* +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ On 03/15/2017 05:06 PM, wangguang wrote: +ï¼ ï¼ am testing QEMU COLO feature described here [QEMU +ï¼ ï¼ Wiki]( +http://wiki.qemu-project.org/Features/COLO +). +ï¼ ï¼ +ï¼ ï¼ When the Primary Node panic,the Secondary Node qemu hang. +ï¼ ï¼ hang at recvmsg in qio_channel_socket_readv. +ï¼ ï¼ And I run { 'execute': 'nbd-server-stop' } and { "execute": +ï¼ ï¼ "x-colo-lost-heartbeat" } in Secondary VM's +ï¼ ï¼ monitor,the Secondary Node qemu still hang at recvmsg . +ï¼ ï¼ +ï¼ ï¼ I found that the colo in qemu is not complete yet. +ï¼ ï¼ Do the colo have any plan for development? +ï¼ +ï¼ Yes, We are developing. You can see some of patch we pushing. +ï¼ +ï¼ ï¼ Has anyone ever run it successfully? Any help is appreciated! +ï¼ +ï¼ In our internal version can run it successfully, +ï¼ The failover detail you can ask Zhanghailiang for help. +ï¼ Next time if you have some question about COLO, +ï¼ please cc me and zhanghailiang address@hidden +ï¼ +ï¼ +ï¼ Thanks +ï¼ Zhang Chen +ï¼ +ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ centos7.2+qemu2.7.50 +ï¼ ï¼ (gdb) bt +ï¼ ï¼ #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 +ï¼ ï¼ #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=ï¼optimized outï¼, +ï¼ ï¼ iov=ï¼optimized outï¼, niov=ï¼optimized outï¼, fds=0x0, nfds=0x0, errp=0x0) at +ï¼ ï¼ io/channel-socket.c:497 +ï¼ ï¼ #2 0x00007f3e03329472 in qio_channel_read (address@hidden, +ï¼ ï¼ address@hidden "", address@hidden, +ï¼ ï¼ address@hidden) at io/channel.c:97 +ï¼ ï¼ #3 0x00007f3e032750e0 in channel_get_buffer (opaque=ï¼optimized outï¼, +ï¼ ï¼ buf=0x7f3e05910f38 "", pos=ï¼optimized outï¼, size=32768) at +ï¼ ï¼ migration/qemu-file-channel.c:78 +ï¼ ï¼ #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at +ï¼ ï¼ migration/qemu-file.c:257 +ï¼ ï¼ #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, +ï¼ ï¼ address@hidden) at migration/qemu-file.c:510 +ï¼ ï¼ #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at +ï¼ ï¼ migration/qemu-file.c:523 +ï¼ ï¼ #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at +ï¼ ï¼ migration/qemu-file.c:603 +ï¼ ï¼ #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, +ï¼ ï¼ address@hidden) at migration/colo.c:215 +ï¼ ï¼ #9 0x00007f3e0327250d in colo_wait_handle_message (errp=0x7f3d62bfaa48, +ï¼ ï¼ checkpoint_request=ï¼synthetic pointerï¼, f=ï¼optimized outï¼) at +ï¼ ï¼ migration/colo.c:546 +ï¼ ï¼ #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at +ï¼ ï¼ migration/colo.c:649 +ï¼ ï¼ #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 +ï¼ ï¼ #12 0x00007f3dfc9c03ed in clone () from /lib64/libc.so.6 +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ -- +ï¼ ï¼ View this message in context: +http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html +ï¼ ï¼ Sent from the Developer mailing list archive at Nabble.com. +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ +ï¼ -- +ï¼ Thanks +ï¼ Zhang Chen +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ + +* Hailiang Zhang (address@hidden) wrote: +> +Hi, +> +> +Thanks for reporting this, and i confirmed it in my test, and it is a bug. +> +> +Though we tried to call qemu_file_shutdown() to shutdown the related fd, in +> +case COLO thread/incoming thread is stuck in read/write() while do failover, +> +but it didn't take effect, because all the fd used by COLO (also migration) +> +has been wrapped by qio channel, and it will not call the shutdown API if +> +we didn't qio_channel_set_feature(QIO_CHANNEL(sioc), +> +QIO_CHANNEL_FEATURE_SHUTDOWN). +> +> +Cc: Dr. David Alan Gilbert <address@hidden> +> +> +I doubted migration cancel has the same problem, it may be stuck in write() +> +if we tried to cancel migration. +> +> +void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error +> +**errp) +> +{ +> +qio_channel_set_name(QIO_CHANNEL(ioc), "migration-fd-outgoing"); +> +migration_channel_connect(s, ioc, NULL); +> +... ... +> +We didn't call qio_channel_set_feature(QIO_CHANNEL(sioc), +> +QIO_CHANNEL_FEATURE_SHUTDOWN) above, +> +and the +> +migrate_fd_cancel() +> +{ +> +... ... +> +if (s->state == MIGRATION_STATUS_CANCELLING && f) { +> +qemu_file_shutdown(f); --> This will not take effect. No ? +> +} +> +} +(cc'd in Daniel Berrange). +I see that we call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN); +at the +top of qio_channel_socket_new; so I think that's safe isn't it? + +Dave + +> +Thanks, +> +Hailiang +> +> +On 2017/3/21 16:10, address@hidden wrote: +> +> Thank youã +> +> +> +> I have test areadyã +> +> +> +> When the Primary Node panic,the Secondary Node qemu hang at the same placeã +> +> +> +> Incorrding +http://wiki.qemu-project.org/Features/COLO +ï¼kill Primary Node +> +> qemu will not produce the problem,but Primary Node panic canã +> +> +> +> I think due to the feature of channel does not support +> +> QIO_CHANNEL_FEATURE_SHUTDOWN. +> +> +> +> +> +> when failover,channel_shutdown could not shut down the channel. +> +> +> +> +> +> so the colo_process_incoming_thread will hang at recvmsg. +> +> +> +> +> +> I test a patch: +> +> +> +> +> +> diff --git a/migration/socket.c b/migration/socket.c +> +> +> +> +> +> index 13966f1..d65a0ea 100644 +> +> +> +> +> +> --- a/migration/socket.c +> +> +> +> +> +> +++ b/migration/socket.c +> +> +> +> +> +> @@ -147,8 +147,9 @@ static gboolean +> +> socket_accept_incoming_migration(QIOChannel *ioc, +> +> +> +> +> +> } +> +> +> +> +> +> +> +> +> +> +> +> trace_migration_socket_incoming_accepted() +> +> +> +> +> +> +> +> +> +> +> +> qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") +> +> +> +> +> +> + qio_channel_set_feature(QIO_CHANNEL(sioc), +> +> QIO_CHANNEL_FEATURE_SHUTDOWN) +> +> +> +> +> +> migration_channel_process_incoming(migrate_get_current(), +> +> +> +> +> +> QIO_CHANNEL(sioc)) +> +> +> +> +> +> object_unref(OBJECT(sioc)) +> +> +> +> +> +> +> +> +> +> My test will not hang any more. +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> åå§é®ä»¶ +> +> +> +> +> +> +> +> åä»¶äººï¼ address@hidden +> +> æ¶ä»¶äººï¼ç广10165992 address@hidden +> +> æéäººï¼ address@hidden address@hidden +> +> æ¥ æ ï¼2017å¹´03æ21æ¥ 15:58 +> +> 主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: [BUG]COLO failover hang +> +> +> +> +> +> +> +> +> +> +> +> Hi,Wang. +> +> +> +> You can test this branch: +> +> +> +> +https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk +> +> +> +> and please follow wiki ensure your own configuration correctly. +> +> +> +> +http://wiki.qemu-project.org/Features/COLO +> +> +> +> +> +> Thanks +> +> +> +> Zhang Chen +> +> +> +> +> +> On 03/21/2017 03:27 PM, address@hidden wrote: +> +> ï¼ +> +> ï¼ hi. +> +> ï¼ +> +> ï¼ I test the git qemu master have the same problem. +> +> ï¼ +> +> ï¼ (gdb) bt +> +> ï¼ +> +> ï¼ #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, +> +> ï¼ niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 +> +> ï¼ +> +> ï¼ #1 0x00007f658e4aa0c2 in qio_channel_read +> +> ï¼ (address@hidden, address@hidden "", +> +> ï¼ address@hidden, address@hidden) at io/channel.c:114 +> +> ï¼ +> +> ï¼ #2 0x00007f658e3ea990 in channel_get_buffer (opaque=ï¼optimized outï¼, +> +> ï¼ buf=0x7f65907cb838 "", pos=ï¼optimized outï¼, size=32768) at +> +> ï¼ migration/qemu-file-channel.c:78 +> +> ï¼ +> +> ï¼ #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at +> +> ï¼ migration/qemu-file.c:295 +> +> ï¼ +> +> ï¼ #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, +> +> ï¼ address@hidden) at migration/qemu-file.c:555 +> +> ï¼ +> +> ï¼ #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at +> +> ï¼ migration/qemu-file.c:568 +> +> ï¼ +> +> ï¼ #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at +> +> ï¼ migration/qemu-file.c:648 +> +> ï¼ +> +> ï¼ #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, +> +> ï¼ address@hidden) at migration/colo.c:244 +> +> ï¼ +> +> ï¼ #8 0x00007f658e3e681e in colo_receive_check_message (f=ï¼optimized +> +> ï¼ outï¼, address@hidden, +> +> ï¼ address@hidden) +> +> ï¼ +> +> ï¼ at migration/colo.c:264 +> +> ï¼ +> +> ï¼ #9 0x00007f658e3e740e in colo_process_incoming_thread +> +> ï¼ (opaque=0x7f658eb30360 ï¼mis_current.31286ï¼) at migration/colo.c:577 +> +> ï¼ +> +> ï¼ #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 +> +> ï¼ +> +> ï¼ #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 +> +> ï¼ +> +> ï¼ (gdb) p ioc-ï¼name +> +> ï¼ +> +> ï¼ $2 = 0x7f658ff7d5c0 "migration-socket-incoming" +> +> ï¼ +> +> ï¼ (gdb) p ioc-ï¼features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN +> +> ï¼ +> +> ï¼ $3 = 0 +> +> ï¼ +> +> ï¼ +> +> ï¼ (gdb) bt +> +> ï¼ +> +> ï¼ #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, +> +> ï¼ condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 +> +> ï¼ +> +> ï¼ #1 0x00007fdcc6966350 in g_main_dispatch (context=ï¼optimized outï¼) at +> +> ï¼ gmain.c:3054 +> +> ï¼ +> +> ï¼ #2 g_main_context_dispatch (context=ï¼optimized outï¼, +> +> ï¼ address@hidden) at gmain.c:3630 +> +> ï¼ +> +> ï¼ #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 +> +> ï¼ +> +> ï¼ #4 os_host_main_loop_wait (timeout=ï¼optimized outï¼) at +> +> ï¼ util/main-loop.c:258 +> +> ï¼ +> +> ï¼ #5 main_loop_wait (address@hidden) at +> +> ï¼ util/main-loop.c:506 +> +> ï¼ +> +> ï¼ #6 0x00007fdccb526187 in main_loop () at vl.c:1898 +> +> ï¼ +> +> ï¼ #7 main (argc=ï¼optimized outï¼, argv=ï¼optimized outï¼, envp=ï¼optimized +> +> ï¼ outï¼) at vl.c:4709 +> +> ï¼ +> +> ï¼ (gdb) p ioc-ï¼features +> +> ï¼ +> +> ï¼ $1 = 6 +> +> ï¼ +> +> ï¼ (gdb) p ioc-ï¼name +> +> ï¼ +> +> ï¼ $2 = 0x7fdcce1b1ab0 "migration-socket-listener" +> +> ï¼ +> +> ï¼ +> +> ï¼ May be socket_accept_incoming_migration should +> +> ï¼ call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? +> +> ï¼ +> +> ï¼ +> +> ï¼ thank you. +> +> ï¼ +> +> ï¼ +> +> ï¼ +> +> ï¼ +> +> ï¼ +> +> ï¼ åå§é®ä»¶ +> +> ï¼ address@hidden +> +> ï¼ address@hidden +> +> ï¼ address@hidden@huawei.comï¼ +> +> ï¼ *æ¥ æ ï¼*2017å¹´03æ16æ¥ 14:46 +> +> ï¼ *主 é¢ ï¼**Re: [Qemu-devel] COLO failover hang* +> +> ï¼ +> +> ï¼ +> +> ï¼ +> +> ï¼ +> +> ï¼ On 03/15/2017 05:06 PM, wangguang wrote: +> +> ï¼ ï¼ am testing QEMU COLO feature described here [QEMU +> +> ï¼ ï¼ Wiki]( +http://wiki.qemu-project.org/Features/COLO +). +> +> ï¼ ï¼ +> +> ï¼ ï¼ When the Primary Node panic,the Secondary Node qemu hang. +> +> ï¼ ï¼ hang at recvmsg in qio_channel_socket_readv. +> +> ï¼ ï¼ And I run { 'execute': 'nbd-server-stop' } and { "execute": +> +> ï¼ ï¼ "x-colo-lost-heartbeat" } in Secondary VM's +> +> ï¼ ï¼ monitor,the Secondary Node qemu still hang at recvmsg . +> +> ï¼ ï¼ +> +> ï¼ ï¼ I found that the colo in qemu is not complete yet. +> +> ï¼ ï¼ Do the colo have any plan for development? +> +> ï¼ +> +> ï¼ Yes, We are developing. You can see some of patch we pushing. +> +> ï¼ +> +> ï¼ ï¼ Has anyone ever run it successfully? Any help is appreciated! +> +> ï¼ +> +> ï¼ In our internal version can run it successfully, +> +> ï¼ The failover detail you can ask Zhanghailiang for help. +> +> ï¼ Next time if you have some question about COLO, +> +> ï¼ please cc me and zhanghailiang address@hidden +> +> ï¼ +> +> ï¼ +> +> ï¼ Thanks +> +> ï¼ Zhang Chen +> +> ï¼ +> +> ï¼ +> +> ï¼ ï¼ +> +> ï¼ ï¼ +> +> ï¼ ï¼ +> +> ï¼ ï¼ centos7.2+qemu2.7.50 +> +> ï¼ ï¼ (gdb) bt +> +> ï¼ ï¼ #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 +> +> ï¼ ï¼ #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=ï¼optimized outï¼, +> +> ï¼ ï¼ iov=ï¼optimized outï¼, niov=ï¼optimized outï¼, fds=0x0, nfds=0x0, errp=0x0) +> +> at +> +> ï¼ ï¼ io/channel-socket.c:497 +> +> ï¼ ï¼ #2 0x00007f3e03329472 in qio_channel_read (address@hidden, +> +> ï¼ ï¼ address@hidden "", address@hidden, +> +> ï¼ ï¼ address@hidden) at io/channel.c:97 +> +> ï¼ ï¼ #3 0x00007f3e032750e0 in channel_get_buffer (opaque=ï¼optimized outï¼, +> +> ï¼ ï¼ buf=0x7f3e05910f38 "", pos=ï¼optimized outï¼, size=32768) at +> +> ï¼ ï¼ migration/qemu-file-channel.c:78 +> +> ï¼ ï¼ #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at +> +> ï¼ ï¼ migration/qemu-file.c:257 +> +> ï¼ ï¼ #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, +> +> ï¼ ï¼ address@hidden) at migration/qemu-file.c:510 +> +> ï¼ ï¼ #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at +> +> ï¼ ï¼ migration/qemu-file.c:523 +> +> ï¼ ï¼ #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at +> +> ï¼ ï¼ migration/qemu-file.c:603 +> +> ï¼ ï¼ #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, +> +> ï¼ ï¼ address@hidden) at migration/colo.c:215 +> +> ï¼ ï¼ #9 0x00007f3e0327250d in colo_wait_handle_message (errp=0x7f3d62bfaa48, +> +> ï¼ ï¼ checkpoint_request=ï¼synthetic pointerï¼, f=ï¼optimized outï¼) at +> +> ï¼ ï¼ migration/colo.c:546 +> +> ï¼ ï¼ #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at +> +> ï¼ ï¼ migration/colo.c:649 +> +> ï¼ ï¼ #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 +> +> ï¼ ï¼ #12 0x00007f3dfc9c03ed in clone () from /lib64/libc.so.6 +> +> ï¼ ï¼ +> +> ï¼ ï¼ +> +> ï¼ ï¼ +> +> ï¼ ï¼ +> +> ï¼ ï¼ +> +> ï¼ ï¼ -- +> +> ï¼ ï¼ View this message in context: +> +> +http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html +> +> ï¼ ï¼ Sent from the Developer mailing list archive at Nabble.com. +> +> ï¼ ï¼ +> +> ï¼ ï¼ +> +> ï¼ ï¼ +> +> ï¼ ï¼ +> +> ï¼ +> +> ï¼ -- +> +> ï¼ Thanks +> +> ï¼ Zhang Chen +> +> ï¼ +> +> ï¼ +> +> ï¼ +> +> ï¼ +> +> ï¼ +> +> +> +-- +Dr. David Alan Gilbert / address@hidden / Manchester, UK + +On 2017/3/21 19:56, Dr. David Alan Gilbert wrote: +* Hailiang Zhang (address@hidden) wrote: +Hi, + +Thanks for reporting this, and i confirmed it in my test, and it is a bug. + +Though we tried to call qemu_file_shutdown() to shutdown the related fd, in +case COLO thread/incoming thread is stuck in read/write() while do failover, +but it didn't take effect, because all the fd used by COLO (also migration) +has been wrapped by qio channel, and it will not call the shutdown API if +we didn't qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN). + +Cc: Dr. David Alan Gilbert <address@hidden> + +I doubted migration cancel has the same problem, it may be stuck in write() +if we tried to cancel migration. + +void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error +**errp) +{ + qio_channel_set_name(QIO_CHANNEL(ioc), "migration-fd-outgoing"); + migration_channel_connect(s, ioc, NULL); + ... ... +We didn't call qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN) above, +and the +migrate_fd_cancel() +{ + ... ... + if (s->state == MIGRATION_STATUS_CANCELLING && f) { + qemu_file_shutdown(f); --> This will not take effect. No ? + } +} +(cc'd in Daniel Berrange). +I see that we call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN); +at the +top of qio_channel_socket_new; so I think that's safe isn't it? +Hmm, you are right, this problem is only exist for the migration incoming fd, +thanks. +Dave +Thanks, +Hailiang + +On 2017/3/21 16:10, address@hidden wrote: +Thank youã + +I have test areadyã + +When the Primary Node panic,the Secondary Node qemu hang at the same placeã + +Incorrding +http://wiki.qemu-project.org/Features/COLO +ï¼kill Primary Node qemu +will not produce the problem,but Primary Node panic canã + +I think due to the feature of channel does not support +QIO_CHANNEL_FEATURE_SHUTDOWN. + + +when failover,channel_shutdown could not shut down the channel. + + +so the colo_process_incoming_thread will hang at recvmsg. + + +I test a patch: + + +diff --git a/migration/socket.c b/migration/socket.c + + +index 13966f1..d65a0ea 100644 + + +--- a/migration/socket.c + + ++++ b/migration/socket.c + + +@@ -147,8 +147,9 @@ static gboolean socket_accept_incoming_migration(QIOChannel +*ioc, + + + } + + + + + + trace_migration_socket_incoming_accepted() + + + + + + qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") + + ++ qio_channel_set_feature(QIO_CHANNEL(sioc), QIO_CHANNEL_FEATURE_SHUTDOWN) + + + migration_channel_process_incoming(migrate_get_current(), + + + QIO_CHANNEL(sioc)) + + + object_unref(OBJECT(sioc)) + + + + +My test will not hang any more. + + + + + + + + + + + + + + + + + +åå§é®ä»¶ + + + +åä»¶äººï¼ address@hidden +æ¶ä»¶äººï¼ç广10165992 address@hidden +æéäººï¼ address@hidden address@hidden +æ¥ æ ï¼2017å¹´03æ21æ¥ 15:58 +主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: [BUG]COLO failover hang + + + + + +Hi,Wang. + +You can test this branch: +https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk +and please follow wiki ensure your own configuration correctly. +http://wiki.qemu-project.org/Features/COLO +Thanks + +Zhang Chen + + +On 03/21/2017 03:27 PM, address@hidden wrote: +ï¼ +ï¼ hi. +ï¼ +ï¼ I test the git qemu master have the same problem. +ï¼ +ï¼ (gdb) bt +ï¼ +ï¼ #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, +ï¼ niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 +ï¼ +ï¼ #1 0x00007f658e4aa0c2 in qio_channel_read +ï¼ (address@hidden, address@hidden "", +ï¼ address@hidden, address@hidden) at io/channel.c:114 +ï¼ +ï¼ #2 0x00007f658e3ea990 in channel_get_buffer (opaque=ï¼optimized outï¼, +ï¼ buf=0x7f65907cb838 "", pos=ï¼optimized outï¼, size=32768) at +ï¼ migration/qemu-file-channel.c:78 +ï¼ +ï¼ #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at +ï¼ migration/qemu-file.c:295 +ï¼ +ï¼ #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, +ï¼ address@hidden) at migration/qemu-file.c:555 +ï¼ +ï¼ #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at +ï¼ migration/qemu-file.c:568 +ï¼ +ï¼ #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at +ï¼ migration/qemu-file.c:648 +ï¼ +ï¼ #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, +ï¼ address@hidden) at migration/colo.c:244 +ï¼ +ï¼ #8 0x00007f658e3e681e in colo_receive_check_message (f=ï¼optimized +ï¼ outï¼, address@hidden, +ï¼ address@hidden) +ï¼ +ï¼ at migration/colo.c:264 +ï¼ +ï¼ #9 0x00007f658e3e740e in colo_process_incoming_thread +ï¼ (opaque=0x7f658eb30360 ï¼mis_current.31286ï¼) at migration/colo.c:577 +ï¼ +ï¼ #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 +ï¼ +ï¼ #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 +ï¼ +ï¼ (gdb) p ioc-ï¼name +ï¼ +ï¼ $2 = 0x7f658ff7d5c0 "migration-socket-incoming" +ï¼ +ï¼ (gdb) p ioc-ï¼features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN +ï¼ +ï¼ $3 = 0 +ï¼ +ï¼ +ï¼ (gdb) bt +ï¼ +ï¼ #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, +ï¼ condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 +ï¼ +ï¼ #1 0x00007fdcc6966350 in g_main_dispatch (context=ï¼optimized outï¼) at +ï¼ gmain.c:3054 +ï¼ +ï¼ #2 g_main_context_dispatch (context=ï¼optimized outï¼, +ï¼ address@hidden) at gmain.c:3630 +ï¼ +ï¼ #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 +ï¼ +ï¼ #4 os_host_main_loop_wait (timeout=ï¼optimized outï¼) at +ï¼ util/main-loop.c:258 +ï¼ +ï¼ #5 main_loop_wait (address@hidden) at +ï¼ util/main-loop.c:506 +ï¼ +ï¼ #6 0x00007fdccb526187 in main_loop () at vl.c:1898 +ï¼ +ï¼ #7 main (argc=ï¼optimized outï¼, argv=ï¼optimized outï¼, envp=ï¼optimized +ï¼ outï¼) at vl.c:4709 +ï¼ +ï¼ (gdb) p ioc-ï¼features +ï¼ +ï¼ $1 = 6 +ï¼ +ï¼ (gdb) p ioc-ï¼name +ï¼ +ï¼ $2 = 0x7fdcce1b1ab0 "migration-socket-listener" +ï¼ +ï¼ +ï¼ May be socket_accept_incoming_migration should +ï¼ call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? +ï¼ +ï¼ +ï¼ thank you. +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ åå§é®ä»¶ +ï¼ address@hidden +ï¼ address@hidden +ï¼ address@hidden@huawei.comï¼ +ï¼ *æ¥ æ ï¼*2017å¹´03æ16æ¥ 14:46 +ï¼ *主 é¢ ï¼**Re: [Qemu-devel] COLO failover hang* +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ On 03/15/2017 05:06 PM, wangguang wrote: +ï¼ ï¼ am testing QEMU COLO feature described here [QEMU +ï¼ ï¼ Wiki]( +http://wiki.qemu-project.org/Features/COLO +). +ï¼ ï¼ +ï¼ ï¼ When the Primary Node panic,the Secondary Node qemu hang. +ï¼ ï¼ hang at recvmsg in qio_channel_socket_readv. +ï¼ ï¼ And I run { 'execute': 'nbd-server-stop' } and { "execute": +ï¼ ï¼ "x-colo-lost-heartbeat" } in Secondary VM's +ï¼ ï¼ monitor,the Secondary Node qemu still hang at recvmsg . +ï¼ ï¼ +ï¼ ï¼ I found that the colo in qemu is not complete yet. +ï¼ ï¼ Do the colo have any plan for development? +ï¼ +ï¼ Yes, We are developing. You can see some of patch we pushing. +ï¼ +ï¼ ï¼ Has anyone ever run it successfully? Any help is appreciated! +ï¼ +ï¼ In our internal version can run it successfully, +ï¼ The failover detail you can ask Zhanghailiang for help. +ï¼ Next time if you have some question about COLO, +ï¼ please cc me and zhanghailiang address@hidden +ï¼ +ï¼ +ï¼ Thanks +ï¼ Zhang Chen +ï¼ +ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ centos7.2+qemu2.7.50 +ï¼ ï¼ (gdb) bt +ï¼ ï¼ #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 +ï¼ ï¼ #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=ï¼optimized outï¼, +ï¼ ï¼ iov=ï¼optimized outï¼, niov=ï¼optimized outï¼, fds=0x0, nfds=0x0, errp=0x0) at +ï¼ ï¼ io/channel-socket.c:497 +ï¼ ï¼ #2 0x00007f3e03329472 in qio_channel_read (address@hidden, +ï¼ ï¼ address@hidden "", address@hidden, +ï¼ ï¼ address@hidden) at io/channel.c:97 +ï¼ ï¼ #3 0x00007f3e032750e0 in channel_get_buffer (opaque=ï¼optimized outï¼, +ï¼ ï¼ buf=0x7f3e05910f38 "", pos=ï¼optimized outï¼, size=32768) at +ï¼ ï¼ migration/qemu-file-channel.c:78 +ï¼ ï¼ #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at +ï¼ ï¼ migration/qemu-file.c:257 +ï¼ ï¼ #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, +ï¼ ï¼ address@hidden) at migration/qemu-file.c:510 +ï¼ ï¼ #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at +ï¼ ï¼ migration/qemu-file.c:523 +ï¼ ï¼ #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at +ï¼ ï¼ migration/qemu-file.c:603 +ï¼ ï¼ #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, +ï¼ ï¼ address@hidden) at migration/colo.c:215 +ï¼ ï¼ #9 0x00007f3e0327250d in colo_wait_handle_message (errp=0x7f3d62bfaa48, +ï¼ ï¼ checkpoint_request=ï¼synthetic pointerï¼, f=ï¼optimized outï¼) at +ï¼ ï¼ migration/colo.c:546 +ï¼ ï¼ #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at +ï¼ ï¼ migration/colo.c:649 +ï¼ ï¼ #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 +ï¼ ï¼ #12 0x00007f3dfc9c03ed in clone () from /lib64/libc.so.6 +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ -- +ï¼ ï¼ View this message in context: +http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html +ï¼ ï¼ Sent from the Developer mailing list archive at Nabble.com. +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ +ï¼ -- +ï¼ Thanks +ï¼ Zhang Chen +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +-- +Dr. David Alan Gilbert / address@hidden / Manchester, UK + +. + +* Hailiang Zhang (address@hidden) wrote: +> +On 2017/3/21 19:56, Dr. David Alan Gilbert wrote: +> +> * Hailiang Zhang (address@hidden) wrote: +> +> > Hi, +> +> > +> +> > Thanks for reporting this, and i confirmed it in my test, and it is a bug. +> +> > +> +> > Though we tried to call qemu_file_shutdown() to shutdown the related fd, +> +> > in +> +> > case COLO thread/incoming thread is stuck in read/write() while do +> +> > failover, +> +> > but it didn't take effect, because all the fd used by COLO (also +> +> > migration) +> +> > has been wrapped by qio channel, and it will not call the shutdown API if +> +> > we didn't qio_channel_set_feature(QIO_CHANNEL(sioc), +> +> > QIO_CHANNEL_FEATURE_SHUTDOWN). +> +> > +> +> > Cc: Dr. David Alan Gilbert <address@hidden> +> +> > +> +> > I doubted migration cancel has the same problem, it may be stuck in +> +> > write() +> +> > if we tried to cancel migration. +> +> > +> +> > void fd_start_outgoing_migration(MigrationState *s, const char *fdname, +> +> > Error **errp) +> +> > { +> +> > qio_channel_set_name(QIO_CHANNEL(ioc), "migration-fd-outgoing"); +> +> > migration_channel_connect(s, ioc, NULL); +> +> > ... ... +> +> > We didn't call qio_channel_set_feature(QIO_CHANNEL(sioc), +> +> > QIO_CHANNEL_FEATURE_SHUTDOWN) above, +> +> > and the +> +> > migrate_fd_cancel() +> +> > { +> +> > ... ... +> +> > if (s->state == MIGRATION_STATUS_CANCELLING && f) { +> +> > qemu_file_shutdown(f); --> This will not take effect. No ? +> +> > } +> +> > } +> +> +> +> (cc'd in Daniel Berrange). +> +> I see that we call qio_channel_set_feature(ioc, +> +> QIO_CHANNEL_FEATURE_SHUTDOWN); at the +> +> top of qio_channel_socket_new; so I think that's safe isn't it? +> +> +> +> +Hmm, you are right, this problem is only exist for the migration incoming fd, +> +thanks. +Yes, and I don't think we normally do a cancel on the incoming side of a +migration. + +Dave + +> +> Dave +> +> +> +> > Thanks, +> +> > Hailiang +> +> > +> +> > On 2017/3/21 16:10, address@hidden wrote: +> +> > > Thank youã +> +> > > +> +> > > I have test areadyã +> +> > > +> +> > > When the Primary Node panic,the Secondary Node qemu hang at the same +> +> > > placeã +> +> > > +> +> > > Incorrding +http://wiki.qemu-project.org/Features/COLO +ï¼kill Primary +> +> > > Node qemu will not produce the problem,but Primary Node panic canã +> +> > > +> +> > > I think due to the feature of channel does not support +> +> > > QIO_CHANNEL_FEATURE_SHUTDOWN. +> +> > > +> +> > > +> +> > > when failover,channel_shutdown could not shut down the channel. +> +> > > +> +> > > +> +> > > so the colo_process_incoming_thread will hang at recvmsg. +> +> > > +> +> > > +> +> > > I test a patch: +> +> > > +> +> > > +> +> > > diff --git a/migration/socket.c b/migration/socket.c +> +> > > +> +> > > +> +> > > index 13966f1..d65a0ea 100644 +> +> > > +> +> > > +> +> > > --- a/migration/socket.c +> +> > > +> +> > > +> +> > > +++ b/migration/socket.c +> +> > > +> +> > > +> +> > > @@ -147,8 +147,9 @@ static gboolean +> +> > > socket_accept_incoming_migration(QIOChannel *ioc, +> +> > > +> +> > > +> +> > > } +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > trace_migration_socket_incoming_accepted() +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > qio_channel_set_name(QIO_CHANNEL(sioc), +> +> > > "migration-socket-incoming") +> +> > > +> +> > > +> +> > > + qio_channel_set_feature(QIO_CHANNEL(sioc), +> +> > > QIO_CHANNEL_FEATURE_SHUTDOWN) +> +> > > +> +> > > +> +> > > migration_channel_process_incoming(migrate_get_current(), +> +> > > +> +> > > +> +> > > QIO_CHANNEL(sioc)) +> +> > > +> +> > > +> +> > > object_unref(OBJECT(sioc)) +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > My test will not hang any more. +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > åå§é®ä»¶ +> +> > > +> +> > > +> +> > > +> +> > > åä»¶äººï¼ address@hidden +> +> > > æ¶ä»¶äººï¼ç广10165992 address@hidden +> +> > > æéäººï¼ address@hidden address@hidden +> +> > > æ¥ æ ï¼2017å¹´03æ21æ¥ 15:58 +> +> > > 主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: [BUG]COLO failover hang +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > Hi,Wang. +> +> > > +> +> > > You can test this branch: +> +> > > +> +> > > +https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk +> +> > > +> +> > > and please follow wiki ensure your own configuration correctly. +> +> > > +> +> > > +http://wiki.qemu-project.org/Features/COLO +> +> > > +> +> > > +> +> > > Thanks +> +> > > +> +> > > Zhang Chen +> +> > > +> +> > > +> +> > > On 03/21/2017 03:27 PM, address@hidden wrote: +> +> > > ï¼ +> +> > > ï¼ hi. +> +> > > ï¼ +> +> > > ï¼ I test the git qemu master have the same problem. +> +> > > ï¼ +> +> > > ï¼ (gdb) bt +> +> > > ï¼ +> +> > > ï¼ #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, +> +> > > ï¼ niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 +> +> > > ï¼ +> +> > > ï¼ #1 0x00007f658e4aa0c2 in qio_channel_read +> +> > > ï¼ (address@hidden, address@hidden "", +> +> > > ï¼ address@hidden, address@hidden) at io/channel.c:114 +> +> > > ï¼ +> +> > > ï¼ #2 0x00007f658e3ea990 in channel_get_buffer (opaque=ï¼optimized outï¼, +> +> > > ï¼ buf=0x7f65907cb838 "", pos=ï¼optimized outï¼, size=32768) at +> +> > > ï¼ migration/qemu-file-channel.c:78 +> +> > > ï¼ +> +> > > ï¼ #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at +> +> > > ï¼ migration/qemu-file.c:295 +> +> > > ï¼ +> +> > > ï¼ #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, +> +> > > ï¼ address@hidden) at migration/qemu-file.c:555 +> +> > > ï¼ +> +> > > ï¼ #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at +> +> > > ï¼ migration/qemu-file.c:568 +> +> > > ï¼ +> +> > > ï¼ #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at +> +> > > ï¼ migration/qemu-file.c:648 +> +> > > ï¼ +> +> > > ï¼ #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, +> +> > > ï¼ address@hidden) at migration/colo.c:244 +> +> > > ï¼ +> +> > > ï¼ #8 0x00007f658e3e681e in colo_receive_check_message (f=ï¼optimized +> +> > > ï¼ outï¼, address@hidden, +> +> > > ï¼ address@hidden) +> +> > > ï¼ +> +> > > ï¼ at migration/colo.c:264 +> +> > > ï¼ +> +> > > ï¼ #9 0x00007f658e3e740e in colo_process_incoming_thread +> +> > > ï¼ (opaque=0x7f658eb30360 ï¼mis_current.31286ï¼) at migration/colo.c:577 +> +> > > ï¼ +> +> > > ï¼ #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 +> +> > > ï¼ +> +> > > ï¼ #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 +> +> > > ï¼ +> +> > > ï¼ (gdb) p ioc-ï¼name +> +> > > ï¼ +> +> > > ï¼ $2 = 0x7f658ff7d5c0 "migration-socket-incoming" +> +> > > ï¼ +> +> > > ï¼ (gdb) p ioc-ï¼features Do not support +> +> > > QIO_CHANNEL_FEATURE_SHUTDOWN +> +> > > ï¼ +> +> > > ï¼ $3 = 0 +> +> > > ï¼ +> +> > > ï¼ +> +> > > ï¼ (gdb) bt +> +> > > ï¼ +> +> > > ï¼ #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, +> +> > > ï¼ condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 +> +> > > ï¼ +> +> > > ï¼ #1 0x00007fdcc6966350 in g_main_dispatch (context=ï¼optimized outï¼) at +> +> > > ï¼ gmain.c:3054 +> +> > > ï¼ +> +> > > ï¼ #2 g_main_context_dispatch (context=ï¼optimized outï¼, +> +> > > ï¼ address@hidden) at gmain.c:3630 +> +> > > ï¼ +> +> > > ï¼ #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 +> +> > > ï¼ +> +> > > ï¼ #4 os_host_main_loop_wait (timeout=ï¼optimized outï¼) at +> +> > > ï¼ util/main-loop.c:258 +> +> > > ï¼ +> +> > > ï¼ #5 main_loop_wait (address@hidden) at +> +> > > ï¼ util/main-loop.c:506 +> +> > > ï¼ +> +> > > ï¼ #6 0x00007fdccb526187 in main_loop () at vl.c:1898 +> +> > > ï¼ +> +> > > ï¼ #7 main (argc=ï¼optimized outï¼, argv=ï¼optimized outï¼, envp=ï¼optimized +> +> > > ï¼ outï¼) at vl.c:4709 +> +> > > ï¼ +> +> > > ï¼ (gdb) p ioc-ï¼features +> +> > > ï¼ +> +> > > ï¼ $1 = 6 +> +> > > ï¼ +> +> > > ï¼ (gdb) p ioc-ï¼name +> +> > > ï¼ +> +> > > ï¼ $2 = 0x7fdcce1b1ab0 "migration-socket-listener" +> +> > > ï¼ +> +> > > ï¼ +> +> > > ï¼ May be socket_accept_incoming_migration should +> +> > > ï¼ call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? +> +> > > ï¼ +> +> > > ï¼ +> +> > > ï¼ thank you. +> +> > > ï¼ +> +> > > ï¼ +> +> > > ï¼ +> +> > > ï¼ +> +> > > ï¼ +> +> > > ï¼ åå§é®ä»¶ +> +> > > ï¼ address@hidden +> +> > > ï¼ address@hidden +> +> > > ï¼ address@hidden@huawei.comï¼ +> +> > > ï¼ *æ¥ æ ï¼*2017å¹´03æ16æ¥ 14:46 +> +> > > ï¼ *主 é¢ ï¼**Re: [Qemu-devel] COLO failover hang* +> +> > > ï¼ +> +> > > ï¼ +> +> > > ï¼ +> +> > > ï¼ +> +> > > ï¼ On 03/15/2017 05:06 PM, wangguang wrote: +> +> > > ï¼ ï¼ am testing QEMU COLO feature described here [QEMU +> +> > > ï¼ ï¼ Wiki]( +http://wiki.qemu-project.org/Features/COLO +). +> +> > > ï¼ ï¼ +> +> > > ï¼ ï¼ When the Primary Node panic,the Secondary Node qemu hang. +> +> > > ï¼ ï¼ hang at recvmsg in qio_channel_socket_readv. +> +> > > ï¼ ï¼ And I run { 'execute': 'nbd-server-stop' } and { "execute": +> +> > > ï¼ ï¼ "x-colo-lost-heartbeat" } in Secondary VM's +> +> > > ï¼ ï¼ monitor,the Secondary Node qemu still hang at recvmsg . +> +> > > ï¼ ï¼ +> +> > > ï¼ ï¼ I found that the colo in qemu is not complete yet. +> +> > > ï¼ ï¼ Do the colo have any plan for development? +> +> > > ï¼ +> +> > > ï¼ Yes, We are developing. You can see some of patch we pushing. +> +> > > ï¼ +> +> > > ï¼ ï¼ Has anyone ever run it successfully? Any help is appreciated! +> +> > > ï¼ +> +> > > ï¼ In our internal version can run it successfully, +> +> > > ï¼ The failover detail you can ask Zhanghailiang for help. +> +> > > ï¼ Next time if you have some question about COLO, +> +> > > ï¼ please cc me and zhanghailiang address@hidden +> +> > > ï¼ +> +> > > ï¼ +> +> > > ï¼ Thanks +> +> > > ï¼ Zhang Chen +> +> > > ï¼ +> +> > > ï¼ +> +> > > ï¼ ï¼ +> +> > > ï¼ ï¼ +> +> > > ï¼ ï¼ +> +> > > ï¼ ï¼ centos7.2+qemu2.7.50 +> +> > > ï¼ ï¼ (gdb) bt +> +> > > ï¼ ï¼ #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 +> +> > > ï¼ ï¼ #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=ï¼optimized +> +> > > outï¼, +> +> > > ï¼ ï¼ iov=ï¼optimized outï¼, niov=ï¼optimized outï¼, fds=0x0, nfds=0x0, +> +> > > errp=0x0) at +> +> > > ï¼ ï¼ io/channel-socket.c:497 +> +> > > ï¼ ï¼ #2 0x00007f3e03329472 in qio_channel_read (address@hidden, +> +> > > ï¼ ï¼ address@hidden "", address@hidden, +> +> > > ï¼ ï¼ address@hidden) at io/channel.c:97 +> +> > > ï¼ ï¼ #3 0x00007f3e032750e0 in channel_get_buffer (opaque=ï¼optimized +> +> > > outï¼, +> +> > > ï¼ ï¼ buf=0x7f3e05910f38 "", pos=ï¼optimized outï¼, size=32768) at +> +> > > ï¼ ï¼ migration/qemu-file-channel.c:78 +> +> > > ï¼ ï¼ #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at +> +> > > ï¼ ï¼ migration/qemu-file.c:257 +> +> > > ï¼ ï¼ #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, +> +> > > ï¼ ï¼ address@hidden) at migration/qemu-file.c:510 +> +> > > ï¼ ï¼ #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at +> +> > > ï¼ ï¼ migration/qemu-file.c:523 +> +> > > ï¼ ï¼ #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at +> +> > > ï¼ ï¼ migration/qemu-file.c:603 +> +> > > ï¼ ï¼ #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, +> +> > > ï¼ ï¼ address@hidden) at migration/colo.c:215 +> +> > > ï¼ ï¼ #9 0x00007f3e0327250d in colo_wait_handle_message +> +> > > (errp=0x7f3d62bfaa48, +> +> > > ï¼ ï¼ checkpoint_request=ï¼synthetic pointerï¼, f=ï¼optimized outï¼) at +> +> > > ï¼ ï¼ migration/colo.c:546 +> +> > > ï¼ ï¼ #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at +> +> > > ï¼ ï¼ migration/colo.c:649 +> +> > > ï¼ ï¼ #11 0x00007f3e00cc1df3 in start_thread () from +> +> > > /lib64/libpthread.so.0 +> +> > > ï¼ ï¼ #12 0x00007f3dfc9c03ed in clone () from /lib64/libc.so.6 +> +> > > ï¼ ï¼ +> +> > > ï¼ ï¼ +> +> > > ï¼ ï¼ +> +> > > ï¼ ï¼ +> +> > > ï¼ ï¼ +> +> > > ï¼ ï¼ -- +> +> > > ï¼ ï¼ View this message in context: +> +> > > +http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html +> +> > > ï¼ ï¼ Sent from the Developer mailing list archive at Nabble.com. +> +> > > ï¼ ï¼ +> +> > > ï¼ ï¼ +> +> > > ï¼ ï¼ +> +> > > ï¼ ï¼ +> +> > > ï¼ +> +> > > ï¼ -- +> +> > > ï¼ Thanks +> +> > > ï¼ Zhang Chen +> +> > > ï¼ +> +> > > ï¼ +> +> > > ï¼ +> +> > > ï¼ +> +> > > ï¼ +> +> > > +> +> > +> +> -- +> +> Dr. David Alan Gilbert / address@hidden / Manchester, UK +> +> +> +> . +> +> +> +-- +Dr. David Alan Gilbert / address@hidden / Manchester, UK + diff --git a/results/classifier/014/none/68897003 b/results/classifier/014/none/68897003 new file mode 100644 index 00000000..763ad296 --- /dev/null +++ b/results/classifier/014/none/68897003 @@ -0,0 +1,743 @@ +register: 0.709 +assembly: 0.697 +graphic: 0.694 +user-level: 0.679 +permissions: 0.677 +PID: 0.677 +virtual: 0.675 +performance: 0.673 +semantic: 0.671 +debug: 0.663 +arm: 0.658 +device: 0.647 +architecture: 0.640 +network: 0.614 +files: 0.608 +KVM: 0.598 +socket: 0.585 +VMM: 0.582 +ppc: 0.574 +kernel: 0.570 +boot: 0.569 +operating system: 0.565 +TCG: 0.565 +alpha: 0.562 +hypervisor: 0.559 +x86: 0.542 +mistranslation: 0.535 +vnc: 0.525 +peripherals: 0.501 +risc-v: 0.464 +i386: 0.449 + +[Qemu-devel] [BUG] VM abort after migration + +Hi guys, + +We found a qemu core in our testing environment, the assertion +'assert(bus->irq_count[i] == 0)' in pcibus_reset() was triggered and +the bus->irq_count[i] is '-1'. + +Through analysis, it was happened after VM migration and we think +it was caused by the following sequence: + +*Migration Source* +1. save bus pci.0 state, including irq_count[x] ( =0 , old ) +2. save E1000: + e1000_pre_save + e1000_mit_timer + set_interrupt_cause + pci_set_irq --> update pci_dev->irq_state to 1 and + update bus->irq_count[x] to 1 ( new ) + the irq_state sent to dest. + +*Migration Dest* +1. Receive the irq_count[x] of pci.0 is 0 , but the irq_state of e1000 is 1. +2. If the e1000 need change irqline , it would call to pci_irq_handler(), + the irq_state maybe change to 0 and bus->irq_count[x] will become + -1 in this situation. +3. do VM reboot then the assertion will be triggered. + +We also found some guys faced the similar problem: +[1] +https://lists.gnu.org/archive/html/qemu-devel/2016-11/msg02525.html +[2] +https://bugs.launchpad.net/qemu/+bug/1702621 +Is there some patches to fix this problem ? +Can we save pcibus state after all the pci devs are saved ? + +Thanks, +Longpeng(Mike) + +* longpeng (address@hidden) wrote: +> +Hi guys, +> +> +We found a qemu core in our testing environment, the assertion +> +'assert(bus->irq_count[i] == 0)' in pcibus_reset() was triggered and +> +the bus->irq_count[i] is '-1'. +> +> +Through analysis, it was happened after VM migration and we think +> +it was caused by the following sequence: +> +> +*Migration Source* +> +1. save bus pci.0 state, including irq_count[x] ( =0 , old ) +> +2. save E1000: +> +e1000_pre_save +> +e1000_mit_timer +> +set_interrupt_cause +> +pci_set_irq --> update pci_dev->irq_state to 1 and +> +update bus->irq_count[x] to 1 ( new ) +> +the irq_state sent to dest. +> +> +*Migration Dest* +> +1. Receive the irq_count[x] of pci.0 is 0 , but the irq_state of e1000 is 1. +> +2. If the e1000 need change irqline , it would call to pci_irq_handler(), +> +the irq_state maybe change to 0 and bus->irq_count[x] will become +> +-1 in this situation. +> +3. do VM reboot then the assertion will be triggered. +> +> +We also found some guys faced the similar problem: +> +[1] +https://lists.gnu.org/archive/html/qemu-devel/2016-11/msg02525.html +> +[2] +https://bugs.launchpad.net/qemu/+bug/1702621 +> +> +Is there some patches to fix this problem ? +I don't remember any. + +> +Can we save pcibus state after all the pci devs are saved ? +Does this problem only happen with e1000? I think so. +If it's only e1000 I think we should fix it - I think once the VM is +stopped for doing the device migration it shouldn't be raising +interrupts. + +Dave + +> +Thanks, +> +Longpeng(Mike) +-- +Dr. David Alan Gilbert / address@hidden / Manchester, UK + +On 2019/7/8 ä¸å5:47, Dr. David Alan Gilbert wrote: +* longpeng (address@hidden) wrote: +Hi guys, + +We found a qemu core in our testing environment, the assertion +'assert(bus->irq_count[i] == 0)' in pcibus_reset() was triggered and +the bus->irq_count[i] is '-1'. + +Through analysis, it was happened after VM migration and we think +it was caused by the following sequence: + +*Migration Source* +1. save bus pci.0 state, including irq_count[x] ( =0 , old ) +2. save E1000: + e1000_pre_save + e1000_mit_timer + set_interrupt_cause + pci_set_irq --> update pci_dev->irq_state to 1 and + update bus->irq_count[x] to 1 ( new ) + the irq_state sent to dest. + +*Migration Dest* +1. Receive the irq_count[x] of pci.0 is 0 , but the irq_state of e1000 is 1. +2. If the e1000 need change irqline , it would call to pci_irq_handler(), + the irq_state maybe change to 0 and bus->irq_count[x] will become + -1 in this situation. +3. do VM reboot then the assertion will be triggered. + +We also found some guys faced the similar problem: +[1] +https://lists.gnu.org/archive/html/qemu-devel/2016-11/msg02525.html +[2] +https://bugs.launchpad.net/qemu/+bug/1702621 +Is there some patches to fix this problem ? +I don't remember any. +Can we save pcibus state after all the pci devs are saved ? +Does this problem only happen with e1000? I think so. +If it's only e1000 I think we should fix it - I think once the VM is +stopped for doing the device migration it shouldn't be raising +interrupts. +I wonder maybe we can simply fix this by no setting ICS on pre_save() +but scheduling mit timer unconditionally in post_load(). +Thanks +Dave +Thanks, +Longpeng(Mike) +-- +Dr. David Alan Gilbert / address@hidden / Manchester, UK + +å¨ 2019/7/10 11:25, Jason Wang åé: +> +> +On 2019/7/8 ä¸å5:47, Dr. David Alan Gilbert wrote: +> +> * longpeng (address@hidden) wrote: +> +>> Hi guys, +> +>> +> +>> We found a qemu core in our testing environment, the assertion +> +>> 'assert(bus->irq_count[i] == 0)' in pcibus_reset() was triggered and +> +>> the bus->irq_count[i] is '-1'. +> +>> +> +>> Through analysis, it was happened after VM migration and we think +> +>> it was caused by the following sequence: +> +>> +> +>> *Migration Source* +> +>> 1. save bus pci.0 state, including irq_count[x] ( =0 , old ) +> +>> 2. save E1000: +> +>>    e1000_pre_save +> +>>     e1000_mit_timer +> +>>      set_interrupt_cause +> +>>       pci_set_irq --> update pci_dev->irq_state to 1 and +> +>>                   update bus->irq_count[x] to 1 ( new ) +> +>>     the irq_state sent to dest. +> +>> +> +>> *Migration Dest* +> +>> 1. Receive the irq_count[x] of pci.0 is 0 , but the irq_state of e1000 is 1. +> +>> 2. If the e1000 need change irqline , it would call to pci_irq_handler(), +> +>>   the irq_state maybe change to 0 and bus->irq_count[x] will become +> +>>   -1 in this situation. +> +>> 3. do VM reboot then the assertion will be triggered. +> +>> +> +>> We also found some guys faced the similar problem: +> +>> [1] +https://lists.gnu.org/archive/html/qemu-devel/2016-11/msg02525.html +> +>> [2] +https://bugs.launchpad.net/qemu/+bug/1702621 +> +>> +> +>> Is there some patches to fix this problem ? +> +> I don't remember any. +> +> +> +>> Can we save pcibus state after all the pci devs are saved ? +> +> Does this problem only happen with e1000? I think so. +> +> If it's only e1000 I think we should fix it - I think once the VM is +> +> stopped for doing the device migration it shouldn't be raising +> +> interrupts. +> +> +> +I wonder maybe we can simply fix this by no setting ICS on pre_save() but +> +scheduling mit timer unconditionally in post_load(). +> +I also think this is a bug of e1000 because we find more cores with the same +frame thease days. + +I'm not familiar with e1000 so hope someone could fix it, thanks. :) + +> +Thanks +> +> +> +> +> +> Dave +> +> +> +>> Thanks, +> +>> Longpeng(Mike) +> +> -- +> +> Dr. David Alan Gilbert / address@hidden / Manchester, UK +> +> +. +> +-- +Regards, +Longpeng(Mike) + +On 2019/7/10 ä¸å11:36, Longpeng (Mike) wrote: +å¨ 2019/7/10 11:25, Jason Wang åé: +On 2019/7/8 ä¸å5:47, Dr. David Alan Gilbert wrote: +* longpeng (address@hidden) wrote: +Hi guys, + +We found a qemu core in our testing environment, the assertion +'assert(bus->irq_count[i] == 0)' in pcibus_reset() was triggered and +the bus->irq_count[i] is '-1'. + +Through analysis, it was happened after VM migration and we think +it was caused by the following sequence: + +*Migration Source* +1. save bus pci.0 state, including irq_count[x] ( =0 , old ) +2. save E1000: +    e1000_pre_save +     e1000_mit_timer +      set_interrupt_cause +       pci_set_irq --> update pci_dev->irq_state to 1 and +                   update bus->irq_count[x] to 1 ( new ) +     the irq_state sent to dest. + +*Migration Dest* +1. Receive the irq_count[x] of pci.0 is 0 , but the irq_state of e1000 is 1. +2. If the e1000 need change irqline , it would call to pci_irq_handler(), +   the irq_state maybe change to 0 and bus->irq_count[x] will become +   -1 in this situation. +3. do VM reboot then the assertion will be triggered. + +We also found some guys faced the similar problem: +[1] +https://lists.gnu.org/archive/html/qemu-devel/2016-11/msg02525.html +[2] +https://bugs.launchpad.net/qemu/+bug/1702621 +Is there some patches to fix this problem ? +I don't remember any. +Can we save pcibus state after all the pci devs are saved ? +Does this problem only happen with e1000? I think so. +If it's only e1000 I think we should fix it - I think once the VM is +stopped for doing the device migration it shouldn't be raising +interrupts. +I wonder maybe we can simply fix this by no setting ICS on pre_save() but +scheduling mit timer unconditionally in post_load(). +I also think this is a bug of e1000 because we find more cores with the same +frame thease days. + +I'm not familiar with e1000 so hope someone could fix it, thanks. :) +Draft a path in attachment, please test. + +Thanks +Thanks +Dave +Thanks, +Longpeng(Mike) +-- +Dr. David Alan Gilbert / address@hidden / Manchester, UK +. +0001-e1000-don-t-raise-interrupt-in-pre_save.patch +Description: +Text Data + +å¨ 2019/7/10 11:57, Jason Wang åé: +> +> +On 2019/7/10 ä¸å11:36, Longpeng (Mike) wrote: +> +> å¨ 2019/7/10 11:25, Jason Wang åé: +> +>> On 2019/7/8 ä¸å5:47, Dr. David Alan Gilbert wrote: +> +>>> * longpeng (address@hidden) wrote: +> +>>>> Hi guys, +> +>>>> +> +>>>> We found a qemu core in our testing environment, the assertion +> +>>>> 'assert(bus->irq_count[i] == 0)' in pcibus_reset() was triggered and +> +>>>> the bus->irq_count[i] is '-1'. +> +>>>> +> +>>>> Through analysis, it was happened after VM migration and we think +> +>>>> it was caused by the following sequence: +> +>>>> +> +>>>> *Migration Source* +> +>>>> 1. save bus pci.0 state, including irq_count[x] ( =0 , old ) +> +>>>> 2. save E1000: +> +>>>>     e1000_pre_save +> +>>>>      e1000_mit_timer +> +>>>>       set_interrupt_cause +> +>>>>        pci_set_irq --> update pci_dev->irq_state to 1 and +> +>>>>                    update bus->irq_count[x] to 1 ( new ) +> +>>>>      the irq_state sent to dest. +> +>>>> +> +>>>> *Migration Dest* +> +>>>> 1. Receive the irq_count[x] of pci.0 is 0 , but the irq_state of e1000 is +> +>>>> 1. +> +>>>> 2. If the e1000 need change irqline , it would call to pci_irq_handler(), +> +>>>>    the irq_state maybe change to 0 and bus->irq_count[x] will become +> +>>>>    -1 in this situation. +> +>>>> 3. do VM reboot then the assertion will be triggered. +> +>>>> +> +>>>> We also found some guys faced the similar problem: +> +>>>> [1] +https://lists.gnu.org/archive/html/qemu-devel/2016-11/msg02525.html +> +>>>> [2] +https://bugs.launchpad.net/qemu/+bug/1702621 +> +>>>> +> +>>>> Is there some patches to fix this problem ? +> +>>> I don't remember any. +> +>>> +> +>>>> Can we save pcibus state after all the pci devs are saved ? +> +>>> Does this problem only happen with e1000? I think so. +> +>>> If it's only e1000 I think we should fix it - I think once the VM is +> +>>> stopped for doing the device migration it shouldn't be raising +> +>>> interrupts. +> +>> +> +>> I wonder maybe we can simply fix this by no setting ICS on pre_save() but +> +>> scheduling mit timer unconditionally in post_load(). +> +>> +> +> I also think this is a bug of e1000 because we find more cores with the same +> +> frame thease days. +> +> +> +> I'm not familiar with e1000 so hope someone could fix it, thanks. :) +> +> +> +> +Draft a path in attachment, please test. +> +Thanks. We'll test it for a few weeks and then give you the feedback. :) + +> +Thanks +> +> +> +>> Thanks +> +>> +> +>> +> +>>> Dave +> +>>> +> +>>>> Thanks, +> +>>>> Longpeng(Mike) +> +>>> -- +> +>>> Dr. David Alan Gilbert / address@hidden / Manchester, UK +> +>> . +> +>> +-- +Regards, +Longpeng(Mike) + +å¨ 2019/7/10 11:57, Jason Wang åé: +> +> +On 2019/7/10 ä¸å11:36, Longpeng (Mike) wrote: +> +> å¨ 2019/7/10 11:25, Jason Wang åé: +> +>> On 2019/7/8 ä¸å5:47, Dr. David Alan Gilbert wrote: +> +>>> * longpeng (address@hidden) wrote: +> +>>>> Hi guys, +> +>>>> +> +>>>> We found a qemu core in our testing environment, the assertion +> +>>>> 'assert(bus->irq_count[i] == 0)' in pcibus_reset() was triggered and +> +>>>> the bus->irq_count[i] is '-1'. +> +>>>> +> +>>>> Through analysis, it was happened after VM migration and we think +> +>>>> it was caused by the following sequence: +> +>>>> +> +>>>> *Migration Source* +> +>>>> 1. save bus pci.0 state, including irq_count[x] ( =0 , old ) +> +>>>> 2. save E1000: +> +>>>>     e1000_pre_save +> +>>>>      e1000_mit_timer +> +>>>>       set_interrupt_cause +> +>>>>        pci_set_irq --> update pci_dev->irq_state to 1 and +> +>>>>                    update bus->irq_count[x] to 1 ( new ) +> +>>>>      the irq_state sent to dest. +> +>>>> +> +>>>> *Migration Dest* +> +>>>> 1. Receive the irq_count[x] of pci.0 is 0 , but the irq_state of e1000 is +> +>>>> 1. +> +>>>> 2. If the e1000 need change irqline , it would call to pci_irq_handler(), +> +>>>>    the irq_state maybe change to 0 and bus->irq_count[x] will become +> +>>>>    -1 in this situation. +> +>>>> 3. do VM reboot then the assertion will be triggered. +> +>>>> +> +>>>> We also found some guys faced the similar problem: +> +>>>> [1] +https://lists.gnu.org/archive/html/qemu-devel/2016-11/msg02525.html +> +>>>> [2] +https://bugs.launchpad.net/qemu/+bug/1702621 +> +>>>> +> +>>>> Is there some patches to fix this problem ? +> +>>> I don't remember any. +> +>>> +> +>>>> Can we save pcibus state after all the pci devs are saved ? +> +>>> Does this problem only happen with e1000? I think so. +> +>>> If it's only e1000 I think we should fix it - I think once the VM is +> +>>> stopped for doing the device migration it shouldn't be raising +> +>>> interrupts. +> +>> +> +>> I wonder maybe we can simply fix this by no setting ICS on pre_save() but +> +>> scheduling mit timer unconditionally in post_load(). +> +>> +> +> I also think this is a bug of e1000 because we find more cores with the same +> +> frame thease days. +> +> +> +> I'm not familiar with e1000 so hope someone could fix it, thanks. :) +> +> +> +> +Draft a path in attachment, please test. +> +Hi Jason, + +We've tested the patch for about two weeks, everything went well, thanks! + +Feel free to add my: +Reported-and-tested-by: Longpeng <address@hidden> + +> +Thanks +> +> +> +>> Thanks +> +>> +> +>> +> +>>> Dave +> +>>> +> +>>>> Thanks, +> +>>>> Longpeng(Mike) +> +>>> -- +> +>>> Dr. David Alan Gilbert / address@hidden / Manchester, UK +> +>> . +> +>> +-- +Regards, +Longpeng(Mike) + +On 2019/7/27 ä¸å2:10, Longpeng (Mike) wrote: +å¨ 2019/7/10 11:57, Jason Wang åé: +On 2019/7/10 ä¸å11:36, Longpeng (Mike) wrote: +å¨ 2019/7/10 11:25, Jason Wang åé: +On 2019/7/8 ä¸å5:47, Dr. David Alan Gilbert wrote: +* longpeng (address@hidden) wrote: +Hi guys, + +We found a qemu core in our testing environment, the assertion +'assert(bus->irq_count[i] == 0)' in pcibus_reset() was triggered and +the bus->irq_count[i] is '-1'. + +Through analysis, it was happened after VM migration and we think +it was caused by the following sequence: + +*Migration Source* +1. save bus pci.0 state, including irq_count[x] ( =0 , old ) +2. save E1000: +     e1000_pre_save +      e1000_mit_timer +       set_interrupt_cause +        pci_set_irq --> update pci_dev->irq_state to 1 and +                    update bus->irq_count[x] to 1 ( new ) +      the irq_state sent to dest. + +*Migration Dest* +1. Receive the irq_count[x] of pci.0 is 0 , but the irq_state of e1000 is 1. +2. If the e1000 need change irqline , it would call to pci_irq_handler(), +    the irq_state maybe change to 0 and bus->irq_count[x] will become +    -1 in this situation. +3. do VM reboot then the assertion will be triggered. + +We also found some guys faced the similar problem: +[1] +https://lists.gnu.org/archive/html/qemu-devel/2016-11/msg02525.html +[2] +https://bugs.launchpad.net/qemu/+bug/1702621 +Is there some patches to fix this problem ? +I don't remember any. +Can we save pcibus state after all the pci devs are saved ? +Does this problem only happen with e1000? I think so. +If it's only e1000 I think we should fix it - I think once the VM is +stopped for doing the device migration it shouldn't be raising +interrupts. +I wonder maybe we can simply fix this by no setting ICS on pre_save() but +scheduling mit timer unconditionally in post_load(). +I also think this is a bug of e1000 because we find more cores with the same +frame thease days. + +I'm not familiar with e1000 so hope someone could fix it, thanks. :) +Draft a path in attachment, please test. +Hi Jason, + +We've tested the patch for about two weeks, everything went well, thanks! + +Feel free to add my: +Reported-and-tested-by: Longpeng <address@hidden> +Applied. + +Thanks +Thanks +Thanks +Dave +Thanks, +Longpeng(Mike) +-- +Dr. David Alan Gilbert / address@hidden / Manchester, UK +. + diff --git a/results/classifier/014/none/70868267 b/results/classifier/014/none/70868267 new file mode 100644 index 00000000..49d61ed1 --- /dev/null +++ b/results/classifier/014/none/70868267 @@ -0,0 +1,67 @@ +operating system: 0.773 +graphic: 0.706 +device: 0.643 +semantic: 0.635 +files: 0.552 +mistranslation: 0.537 +register: 0.530 +performance: 0.525 +debug: 0.521 +architecture: 0.433 +PID: 0.420 +socket: 0.418 +hypervisor: 0.416 +network: 0.411 +user-level: 0.400 +x86: 0.348 +kernel: 0.289 +peripherals: 0.274 +permissions: 0.265 +i386: 0.249 +risc-v: 0.243 +assembly: 0.240 +vnc: 0.227 +alpha: 0.205 +virtual: 0.205 +boot: 0.197 +arm: 0.189 +VMM: 0.187 +ppc: 0.180 +KVM: 0.167 +TCG: 0.159 + +[Qemu-devel] [BUG] Failed to compile using gcc7.1 + +Hi all, + +After upgrading gcc from 6.3.1 to 7.1.1, qemu can't be compiled with gcc. + +The error is: + +------ + CC block/blkdebug.o +block/blkdebug.c: In function 'blkdebug_refresh_filename': +block/blkdebug.c:693:31: error: '%s' directive output may be truncated +writing up to 4095 bytes into a region of size 4086 +[-Werror=format-truncation=] +"blkdebug:%s:%s", s->config_file ?: "", + ^~ +In file included from /usr/include/stdio.h:939:0, + from /home/adam/qemu/include/qemu/osdep.h:68, + from block/blkdebug.c:25: +/usr/include/bits/stdio2.h:64:10: note: '__builtin___snprintf_chk' +output 11 or more bytes (assuming 4106) into a destination of size 4096 +return __builtin___snprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1, + ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + __bos (__s), __fmt, __va_arg_pack ()); + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +cc1: all warnings being treated as errors +make: *** [/home/adam/qemu/rules.mak:69: block/blkdebug.o] Error 1 +------ + +It seems that gcc 7 is introducing more restrict check for printf. +If using clang, although there are some extra warning, it can at least +pass the compile. +Thanks, +Qu + diff --git a/results/classifier/014/none/71456293 b/results/classifier/014/none/71456293 new file mode 100644 index 00000000..652b2c47 --- /dev/null +++ b/results/classifier/014/none/71456293 @@ -0,0 +1,1513 @@ +user-level: 0.699 +KVM: 0.691 +operating system: 0.670 +mistranslation: 0.659 +hypervisor: 0.656 +peripherals: 0.646 +TCG: 0.642 +ppc: 0.642 +x86: 0.637 +i386: 0.633 +virtual: 0.629 +vnc: 0.625 +risc-v: 0.621 +VMM: 0.621 +debug: 0.620 +kernel: 0.620 +PID: 0.614 +permissions: 0.613 +register: 0.609 +graphic: 0.603 +assembly: 0.602 +device: 0.601 +semantic: 0.600 +alpha: 0.598 +arm: 0.598 +boot: 0.598 +socket: 0.596 +architecture: 0.594 +performance: 0.594 +files: 0.592 +network: 0.491 + +[Qemu-devel][bug] qemu crash when migrate vm and vm's disks + +When migrate vm and vmâs disks target host qemu crash due to an invalid free. +#0 object_unref (obj=0x1000) at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/qom/object.c:920 +#1 0x0000560434d79e79 in memory_region_unref (mr=<optimized out>) +at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:1730 +#2 flatview_destroy (view=0x560439653880) at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:292 +#3 0x000056043514dfbe in call_rcu_thread (opaque=<optimized out>) +at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/util/rcu.c:284 +#4 0x00007fbc2b36fe25 in start_thread () from /lib64/libpthread.so.0 +#5 0x00007fbc2b099bad in clone () from /lib64/libc.so.6 +test base qemu-2.12.0 +ï¼ +but use lastest qemu(v6.0.0-rc2) also reproduce. +As follow patch can resolve this problem: +https://lists.gnu.org/archive/html/qemu-devel/2018-07/msg02272.html +Steps to reproduce: +(1) Create VM (virsh define) +(2) Add 64 virtio scsi disks +(3) migrate vm and vmâdisks +------------------------------------------------------------------------------------------------------------------------------------- +æ¬é®ä»¶åå ¶é件嫿æ°åä¸éå¢çä¿å¯ä¿¡æ¯ï¼ä» éäºåéç»ä¸é¢å°åä¸ååº +ç个人æç¾¤ç»ãç¦æ¢ä»»ä½å ¶ä»äººä»¥ä»»ä½å½¢å¼ä½¿ç¨ï¼å æ¬ä½ä¸éäºå ¨é¨æé¨åå°æ³é²ãå¤å¶ã +ææ£åï¼æ¬é®ä»¶ä¸çä¿¡æ¯ã妿æ¨éæ¶äºæ¬é®ä»¶ï¼è¯·æ¨ç«å³çµè¯æé®ä»¶éç¥å件人并å 餿¬ +é®ä»¶ï¼ +This e-mail and its attachments contain confidential information from New H3C, which is +intended only for the person or entity whose address is listed above. Any use of the +information contained herein in any way (including, but not limited to, total or partial +disclosure, reproduction, or dissemination) by persons other than the intended +recipient(s) is prohibited. If you receive this e-mail in error, please notify the sender +by phone or email immediately and delete it! + +* Yuchen (yu.chen@h3c.com) wrote: +> +When migrate vm and vmâs disks target host qemu crash due to an invalid free. +> +> +#0 object_unref (obj=0x1000) at +> +/qemu-2.12/rpmbuild/BUILD/qemu-2.12/qom/object.c:920 +> +#1 0x0000560434d79e79 in memory_region_unref (mr=<optimized out>) +> +at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:1730 +> +#2 flatview_destroy (view=0x560439653880) at +> +/qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:292 +> +#3 0x000056043514dfbe in call_rcu_thread (opaque=<optimized out>) +> +at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/util/rcu.c:284 +> +#4 0x00007fbc2b36fe25 in start_thread () from /lib64/libpthread.so.0 +> +#5 0x00007fbc2b099bad in clone () from /lib64/libc.so.6 +> +> +test base qemu-2.12.0ï¼but use lastest qemu(v6.0.0-rc2) also reproduce. +Interesting. + +> +As follow patch can resolve this problem: +> +https://lists.gnu.org/archive/html/qemu-devel/2018-07/msg02272.html +That's a pci/rcu change; ccing Paolo and Micahel. + +> +Steps to reproduce: +> +(1) Create VM (virsh define) +> +(2) Add 64 virtio scsi disks +Is that hot adding the disks later, or are they included in the VM at +creation? +Can you provide a libvirt XML example? + +> +(3) migrate vm and vmâdisks +What do you mean by 'and vm disks' - are you doing a block migration? + +Dave + +> +------------------------------------------------------------------------------------------------------------------------------------- +> +æ¬é®ä»¶åå ¶é件嫿æ°åä¸éå¢çä¿å¯ä¿¡æ¯ï¼ä» éäºåéç»ä¸é¢å°åä¸ååº +> +ç个人æç¾¤ç»ãç¦æ¢ä»»ä½å ¶ä»äººä»¥ä»»ä½å½¢å¼ä½¿ç¨ï¼å æ¬ä½ä¸éäºå ¨é¨æé¨åå°æ³é²ãå¤å¶ã +> +ææ£åï¼æ¬é®ä»¶ä¸çä¿¡æ¯ã妿æ¨éæ¶äºæ¬é®ä»¶ï¼è¯·æ¨ç«å³çµè¯æé®ä»¶éç¥å件人并å 餿¬ +> +é®ä»¶ï¼ +> +This e-mail and its attachments contain confidential information from New +> +H3C, which is +> +intended only for the person or entity whose address is listed above. Any use +> +of the +> +information contained herein in any way (including, but not limited to, total +> +or partial +> +disclosure, reproduction, or dissemination) by persons other than the intended +> +recipient(s) is prohibited. If you receive this e-mail in error, please +> +notify the sender +> +by phone or email immediately and delete it! +-- +Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK + +> +-----é®ä»¶åä»¶----- +> +å件人: Dr. David Alan Gilbert [ +mailto:dgilbert@redhat.com +] +> +åéæ¶é´: 2021å¹´4æ8æ¥ 19:27 +> +æ¶ä»¶äºº: yuchen (Cloud) <yu.chen@h3c.com>; pbonzini@redhat.com; +> +mst@redhat.com +> +æé: qemu-devel@nongnu.org +> +主é¢: Re: [Qemu-devel][bug] qemu crash when migrate vm and vm's disks +> +> +* Yuchen (yu.chen@h3c.com) wrote: +> +> When migrate vm and vmâs disks target host qemu crash due to an invalid +> +free. +> +> +> +> #0 object_unref (obj=0x1000) at +> +> /qemu-2.12/rpmbuild/BUILD/qemu-2.12/qom/object.c:920 +> +> #1 0x0000560434d79e79 in memory_region_unref (mr=<optimized out>) +> +> at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:1730 +> +> #2 flatview_destroy (view=0x560439653880) at +> +> /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:292 +> +> #3 0x000056043514dfbe in call_rcu_thread (opaque=<optimized out>) +> +> at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/util/rcu.c:284 +> +> #4 0x00007fbc2b36fe25 in start_thread () from /lib64/libpthread.so.0 +> +> #5 0x00007fbc2b099bad in clone () from /lib64/libc.so.6 +> +> +> +> test base qemu-2.12.0ï¼but use lastest qemu(v6.0.0-rc2) also reproduce. +> +> +Interesting. +> +> +> As follow patch can resolve this problem: +> +> +https://lists.gnu.org/archive/html/qemu-devel/2018-07/msg02272.html +> +> +That's a pci/rcu change; ccing Paolo and Micahel. +> +> +> Steps to reproduce: +> +> (1) Create VM (virsh define) +> +> (2) Add 64 virtio scsi disks +> +> +Is that hot adding the disks later, or are they included in the VM at +> +creation? +> +Can you provide a libvirt XML example? +> +Include disks in the VM at creation + +vm disks xml (only virtio scsi disks): + <devices> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native'/> + <source file='/vms/tempp/vm-os'/> + <target dev='vda' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x08' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data1'/> + <target dev='sda' bus='scsi'/> + <address type='drive' controller='2' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data2'/> + <target dev='sdb' bus='scsi'/> + <address type='drive' controller='3' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data3'/> + <target dev='sdc' bus='scsi'/> + <address type='drive' controller='4' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data4'/> + <target dev='sdd' bus='scsi'/> + <address type='drive' controller='5' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data5'/> + <target dev='sde' bus='scsi'/> + <address type='drive' controller='6' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data6'/> + <target dev='sdf' bus='scsi'/> + <address type='drive' controller='7' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data7'/> + <target dev='sdg' bus='scsi'/> + <address type='drive' controller='8' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data8'/> + <target dev='sdh' bus='scsi'/> + <address type='drive' controller='9' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data9'/> + <target dev='sdi' bus='scsi'/> + <address type='drive' controller='10' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data10'/> + <target dev='sdj' bus='scsi'/> + <address type='drive' controller='11' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data11'/> + <target dev='sdk' bus='scsi'/> + <address type='drive' controller='12' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data12'/> + <target dev='sdl' bus='scsi'/> + <address type='drive' controller='13' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data13'/> + <target dev='sdm' bus='scsi'/> + <address type='drive' controller='14' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data14'/> + <target dev='sdn' bus='scsi'/> + <address type='drive' controller='15' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data15'/> + <target dev='sdo' bus='scsi'/> + <address type='drive' controller='16' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data16'/> + <target dev='sdp' bus='scsi'/> + <address type='drive' controller='17' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data17'/> + <target dev='sdq' bus='scsi'/> + <address type='drive' controller='18' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data18'/> + <target dev='sdr' bus='scsi'/> + <address type='drive' controller='19' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data19'/> + <target dev='sds' bus='scsi'/> + <address type='drive' controller='20' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data20'/> + <target dev='sdt' bus='scsi'/> + <address type='drive' controller='21' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data21'/> + <target dev='sdu' bus='scsi'/> + <address type='drive' controller='22' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data22'/> + <target dev='sdv' bus='scsi'/> + <address type='drive' controller='23' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data23'/> + <target dev='sdw' bus='scsi'/> + <address type='drive' controller='24' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data24'/> + <target dev='sdx' bus='scsi'/> + <address type='drive' controller='25' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data25'/> + <target dev='sdy' bus='scsi'/> + <address type='drive' controller='26' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data26'/> + <target dev='sdz' bus='scsi'/> + <address type='drive' controller='27' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data27'/> + <target dev='sdaa' bus='scsi'/> + <address type='drive' controller='28' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data28'/> + <target dev='sdab' bus='scsi'/> + <address type='drive' controller='29' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data29'/> + <target dev='sdac' bus='scsi'/> + <address type='drive' controller='30' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data30'/> + <target dev='sdad' bus='scsi'/> + <address type='drive' controller='31' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data31'/> + <target dev='sdae' bus='scsi'/> + <address type='drive' controller='32' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data32'/> + <target dev='sdaf' bus='scsi'/> + <address type='drive' controller='33' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data33'/> + <target dev='sdag' bus='scsi'/> + <address type='drive' controller='34' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data34'/> + <target dev='sdah' bus='scsi'/> + <address type='drive' controller='35' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data35'/> + <target dev='sdai' bus='scsi'/> + <address type='drive' controller='36' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data36'/> + <target dev='sdaj' bus='scsi'/> + <address type='drive' controller='37' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data37'/> + <target dev='sdak' bus='scsi'/> + <address type='drive' controller='38' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data38'/> + <target dev='sdal' bus='scsi'/> + <address type='drive' controller='39' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data39'/> + <target dev='sdam' bus='scsi'/> + <address type='drive' controller='40' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data40'/> + <target dev='sdan' bus='scsi'/> + <address type='drive' controller='41' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data41'/> + <target dev='sdao' bus='scsi'/> + <address type='drive' controller='42' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data42'/> + <target dev='sdap' bus='scsi'/> + <address type='drive' controller='43' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data43'/> + <target dev='sdaq' bus='scsi'/> + <address type='drive' controller='44' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data44'/> + <target dev='sdar' bus='scsi'/> + <address type='drive' controller='45' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data45'/> + <target dev='sdas' bus='scsi'/> + <address type='drive' controller='46' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data46'/> + <target dev='sdat' bus='scsi'/> + <address type='drive' controller='47' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data47'/> + <target dev='sdau' bus='scsi'/> + <address type='drive' controller='48' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data48'/> + <target dev='sdav' bus='scsi'/> + <address type='drive' controller='49' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data49'/> + <target dev='sdaw' bus='scsi'/> + <address type='drive' controller='50' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data50'/> + <target dev='sdax' bus='scsi'/> + <address type='drive' controller='51' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data51'/> + <target dev='sday' bus='scsi'/> + <address type='drive' controller='52' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data52'/> + <target dev='sdaz' bus='scsi'/> + <address type='drive' controller='53' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data53'/> + <target dev='sdba' bus='scsi'/> + <address type='drive' controller='54' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data54'/> + <target dev='sdbb' bus='scsi'/> + <address type='drive' controller='55' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data55'/> + <target dev='sdbc' bus='scsi'/> + <address type='drive' controller='56' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data56'/> + <target dev='sdbd' bus='scsi'/> + <address type='drive' controller='57' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data57'/> + <target dev='sdbe' bus='scsi'/> + <address type='drive' controller='58' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data58'/> + <target dev='sdbf' bus='scsi'/> + <address type='drive' controller='59' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data59'/> + <target dev='sdbg' bus='scsi'/> + <address type='drive' controller='60' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data60'/> + <target dev='sdbh' bus='scsi'/> + <address type='drive' controller='61' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data61'/> + <target dev='sdbi' bus='scsi'/> + <address type='drive' controller='62' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data62'/> + <target dev='sdbj' bus='scsi'/> + <address type='drive' controller='63' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data63'/> + <target dev='sdbk' bus='scsi'/> + <address type='drive' controller='64' bus='0' target='0' unit='0'/> + </disk> + <controller type='scsi' index='0'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x02' +function='0x0'/> + </controller> + <controller type='scsi' index='1' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x06' +function='0x0'/> + </controller> + <controller type='scsi' index='2' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x01' +function='0x0'/> + </controller> + <controller type='scsi' index='3' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x03' +function='0x0'/> + </controller> + <controller type='scsi' index='4' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x04' +function='0x0'/> + </controller> + <controller type='scsi' index='5' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x05' +function='0x0'/> + </controller> + <controller type='scsi' index='6' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x06' +function='0x0'/> + </controller> + <controller type='scsi' index='7' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x07' +function='0x0'/> + </controller> + <controller type='scsi' index='8' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x08' +function='0x0'/> + </controller> + <controller type='scsi' index='9' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x09' +function='0x0'/> + </controller> + <controller type='scsi' index='10' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0a' +function='0x0'/> + </controller> + <controller type='scsi' index='11' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0b' +function='0x0'/> + </controller> + <controller type='scsi' index='12' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0c' +function='0x0'/> + </controller> + <controller type='scsi' index='13' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0d' +function='0x0'/> + </controller> + <controller type='scsi' index='14' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0e' +function='0x0'/> + </controller> + <controller type='scsi' index='15' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0f' +function='0x0'/> + </controller> + <controller type='scsi' index='16' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x10' +function='0x0'/> + </controller> + <controller type='scsi' index='17' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x11' +function='0x0'/> + </controller> + <controller type='scsi' index='18' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x12' +function='0x0'/> + </controller> + <controller type='scsi' index='19' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x13' +function='0x0'/> + </controller> + <controller type='scsi' index='20' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x14' +function='0x0'/> + </controller> + <controller type='scsi' index='21' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x15' +function='0x0'/> + </controller> + <controller type='scsi' index='22' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x16' +function='0x0'/> + </controller> + <controller type='scsi' index='23' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x17' +function='0x0'/> + </controller> + <controller type='scsi' index='24' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x18' +function='0x0'/> + </controller> + <controller type='scsi' index='25' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x19' +function='0x0'/> + </controller> + <controller type='scsi' index='26' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1a' +function='0x0'/> + </controller> + <controller type='scsi' index='27' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1b' +function='0x0'/> + </controller> + <controller type='scsi' index='28' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1c' +function='0x0'/> + </controller> + <controller type='scsi' index='29' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1d' +function='0x0'/> + </controller> + <controller type='scsi' index='30' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1e' +function='0x0'/> + </controller> + <controller type='scsi' index='31' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x01' +function='0x0'/> + </controller> + <controller type='scsi' index='32' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x02' +function='0x0'/> + </controller> + <controller type='scsi' index='33' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x03' +function='0x0'/> + </controller> + <controller type='scsi' index='34' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x04' +function='0x0'/> + </controller> + <controller type='scsi' index='35' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x05' +function='0x0'/> + </controller> + <controller type='scsi' index='36' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x06' +function='0x0'/> + </controller> + <controller type='scsi' index='37' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x07' +function='0x0'/> + </controller> + <controller type='scsi' index='38' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x08' +function='0x0'/> + </controller> + <controller type='scsi' index='39' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x09' +function='0x0'/> + </controller> + <controller type='scsi' index='40' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x0a' +function='0x0'/> + </controller> + <controller type='scsi' index='41' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x0b' +function='0x0'/> + </controller> + <controller type='scsi' index='42' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x0c' +function='0x0'/> + </controller> + <controller type='scsi' index='43' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x0d' +function='0x0'/> + </controller> + <controller type='scsi' index='44' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' +function='0x0'/> + </controller> + <controller type='scsi' index='45' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x09' +function='0x0'/> + </controller> + <controller type='scsi' index='46' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0b' +function='0x0'/> + </controller> + <controller type='scsi' index='47' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0c' +function='0x0'/> + </controller> + <controller type='scsi' index='48' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0d' +function='0x0'/> + </controller> + <controller type='scsi' index='49' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0e' +function='0x0'/> + </controller> + <controller type='scsi' index='50' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0f' +function='0x0'/> + </controller> + <controller type='scsi' index='51' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x10' +function='0x0'/> + </controller> + <controller type='scsi' index='52' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x11' +function='0x0'/> + </controller> + <controller type='scsi' index='53' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x12' +function='0x0'/> + </controller> + <controller type='scsi' index='54' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x13' +function='0x0'/> + </controller> + <controller type='scsi' index='55' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x14' +function='0x0'/> + </controller> + <controller type='scsi' index='56' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x15' +function='0x0'/> + </controller> + <controller type='scsi' index='57' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x16' +function='0x0'/> + </controller> + <controller type='scsi' index='58' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x17' +function='0x0'/> + </controller> + <controller type='scsi' index='59' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x18' +function='0x0'/> + </controller> + <controller type='scsi' index='60' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x19' +function='0x0'/> + </controller> + <controller type='scsi' index='61' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1a' +function='0x0'/> + </controller> + <controller type='scsi' index='62' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1b' +function='0x0'/> + </controller> + <controller type='scsi' index='63' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1c' +function='0x0'/> + </controller> + <controller type='scsi' index='64' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1d' +function='0x0'/> + </controller> + <controller type='pci' index='0' model='pci-root'/> + <controller type='pci' index='1' model='pci-bridge'> + <model name='pci-bridge'/> + <target chassisNr='1'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1f' +function='0x0'/> + </controller> + <controller type='pci' index='2' model='pci-bridge'> + <model name='pci-bridge'/> + <target chassisNr='2'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1f' +function='0x0'/> + </controller> + </devices> + +vm disks xml (only virtio disks): + <devices> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native'/> + <source file='/vms/tempp/vm-os'/> + <target dev='vda' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x08' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data2'/> + <target dev='vdb' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x06' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data3'/> + <target dev='vdc' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x09' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data4'/> + <target dev='vdd' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0b' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data5'/> + <target dev='vde' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0c' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data6'/> + <target dev='vdf' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0d' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data7'/> + <target dev='vdg' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0e' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data8'/> + <target dev='vdh' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0f' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data9'/> + <target dev='vdi' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x10' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data10'/> + <target dev='vdj' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x11' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data11'/> + <target dev='vdk' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x12' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data12'/> + <target dev='vdl' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x13' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data13'/> + <target dev='vdm' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x14' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data14'/> + <target dev='vdn' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x15' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data15'/> + <target dev='vdo' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x16' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data16'/> + <target dev='vdp' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x17' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data17'/> + <target dev='vdq' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x18' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data18'/> + <target dev='vdr' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x19' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data19'/> + <target dev='vds' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1a' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data20'/> + <target dev='vdt' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1b' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data21'/> + <target dev='vdu' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1c' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data22'/> + <target dev='vdv' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1d' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data23'/> + <target dev='vdw' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1e' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data24'/> + <target dev='vdx' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x01' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data25'/> + <target dev='vdy' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x03' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data26'/> + <target dev='vdz' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x04' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data27'/> + <target dev='vdaa' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x05' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data28'/> + <target dev='vdab' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x06' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data29'/> + <target dev='vdac' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x07' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data30'/> + <target dev='vdad' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x08' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data31'/> + <target dev='vdae' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x09' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data32'/> + <target dev='vdaf' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0a' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data33'/> + <target dev='vdag' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0b' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data34'/> + <target dev='vdah' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0c' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data35'/> + <target dev='vdai' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0d' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data36'/> + <target dev='vdaj' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0e' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data37'/> + <target dev='vdak' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0f' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data38'/> + <target dev='vdal' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x10' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data39'/> + <target dev='vdam' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x11' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data40'/> + <target dev='vdan' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x12' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data41'/> + <target dev='vdao' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x13' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data42'/> + <target dev='vdap' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x14' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data43'/> + <target dev='vdaq' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x15' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data44'/> + <target dev='vdar' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x16' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data45'/> + <target dev='vdas' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x17' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data46'/> + <target dev='vdat' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x18' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data47'/> + <target dev='vdau' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x19' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data48'/> + <target dev='vdav' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1a' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data49'/> + <target dev='vdaw' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1b' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data50'/> + <target dev='vdax' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1c' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data51'/> + <target dev='vday' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1d' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data52'/> + <target dev='vdaz' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1e' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data53'/> + <target dev='vdba' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x01' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data54'/> + <target dev='vdbb' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x02' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data55'/> + <target dev='vdbc' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x03' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data56'/> + <target dev='vdbd' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x04' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data57'/> + <target dev='vdbe' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x05' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data58'/> + <target dev='vdbf' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x06' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data59'/> + <target dev='vdbg' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x07' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data60'/> + <target dev='vdbh' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x08' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data61'/> + <target dev='vdbi' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x09' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data62'/> + <target dev='vdbj' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x0a' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data63'/> + <target dev='vdbk' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x0b' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data1'/> + <target dev='vdbl' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' +function='0x0'/> + </disk> + <controller type='pci' index='0' model='pci-root'/> + <controller type='pci' index='1' model='pci-bridge'> + <model name='pci-bridge'/> + <target chassisNr='1'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1f' +function='0x0'/> + </controller> + <controller type='pci' index='2' model='pci-bridge'> + <model name='pci-bridge'/> + <target chassisNr='2'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1f' +function='0x0'/> + </controller> + </devices> + +> +> (3) migrate vm and vmâdisks +> +> +What do you mean by 'and vm disks' - are you doing a block migration? +> +Yes, block migration. +In fact, only migration domain also reproduced. + +> +Dave +> +> +> ---------------------------------------------------------------------- +> +> --------------------------------------------------------------- +> +Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK +------------------------------------------------------------------------------------------------------------------------------------- +æ¬é®ä»¶åå ¶é件嫿æ°åä¸éå¢çä¿å¯ä¿¡æ¯ï¼ä» éäºåéç»ä¸é¢å°åä¸ååº +ç个人æç¾¤ç»ãç¦æ¢ä»»ä½å ¶ä»äººä»¥ä»»ä½å½¢å¼ä½¿ç¨ï¼å æ¬ä½ä¸éäºå ¨é¨æé¨åå°æ³é²ãå¤å¶ã +ææ£åï¼æ¬é®ä»¶ä¸çä¿¡æ¯ã妿æ¨éæ¶äºæ¬é®ä»¶ï¼è¯·æ¨ç«å³çµè¯æé®ä»¶éç¥å件人并å 餿¬ +é®ä»¶ï¼ +This e-mail and its attachments contain confidential information from New H3C, +which is +intended only for the person or entity whose address is listed above. Any use +of the +information contained herein in any way (including, but not limited to, total +or partial +disclosure, reproduction, or dissemination) by persons other than the intended +recipient(s) is prohibited. If you receive this e-mail in error, please notify +the sender +by phone or email immediately and delete it! + |