diff options
Diffstat (limited to 'results/classifier/009/PID')
| -rw-r--r-- | results/classifier/009/PID/11933524 | 1135 | ||||
| -rw-r--r-- | results/classifier/009/PID/70294255 | 1071 |
2 files changed, 2206 insertions, 0 deletions
diff --git a/results/classifier/009/PID/11933524 b/results/classifier/009/PID/11933524 new file mode 100644 index 00000000..c8313c02 --- /dev/null +++ b/results/classifier/009/PID/11933524 @@ -0,0 +1,1135 @@ +PID: 0.791 +other: 0.771 +device: 0.762 +permissions: 0.752 +debug: 0.752 +socket: 0.751 +boot: 0.743 +graphic: 0.737 +performance: 0.736 +vnc: 0.695 +KVM: 0.689 +semantic: 0.673 +network: 0.662 +files: 0.660 + +[BUG] hw/i386/pc.c: CXL Fixed Memory Window should not reserve e820 in bios + +Early-boot e820 records will be inserted by the bios/efi/early boot +software and be reported to the kernel via insert_resource. Later, when +CXL drivers iterate through the regions again, they will insert another +resource and make the RESERVED memory area a child. + +This RESERVED memory area causes the memory region to become unusable, +and as a result attempting to create memory regions with + + `cxl create-region ...` + +Will fail due to the RESERVED area intersecting with the CXL window. + + +During boot the following traceback is observed: + +0xffffffff81101650 in insert_resource_expand_to_fit () +0xffffffff83d964c5 in e820__reserve_resources_late () +0xffffffff83e03210 in pcibios_resource_survey () +0xffffffff83e04f4a in pcibios_init () + +Which produces a call to reserve the CFMWS area: + +(gdb) p *new +$54 = {start = 0x290000000, end = 0x2cfffffff, name = "Reserved", + flags = 0x200, desc = 0x7, parent = 0x0, sibling = 0x0, + child = 0x0} + +Later the Kernel parses ACPI tables and reserves the exact same area as +the CXL Fixed Memory Window. The use of `insert_resource_conflict` +retains the RESERVED region and makes it a child of the new region. + +0xffffffff811016a4 in insert_resource_conflict () + insert_resource () +0xffffffff81a81389 in cxl_parse_cfmws () +0xffffffff818c4a81 in call_handler () + acpi_parse_entries_array () + +(gdb) p/x *new +$59 = {start = 0x290000000, end = 0x2cfffffff, name = "CXL Window 0", + flags = 0x200, desc = 0x0, parent = 0x0, sibling = 0x0, + child = 0x0} + +This produces the following output in /proc/iomem: + +590000000-68fffffff : CXL Window 0 + 590000000-68fffffff : Reserved + +This reserved area causes `get_free_mem_region()` to fail due to a check +against `__region_intersects()`. Due to this reserved area, the +intersect check will only ever return REGION_INTERSECTS, which causes +`cxl create-region` to always fail. + +Signed-off-by: Gregory Price <gregory.price@memverge.com> +--- + hw/i386/pc.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 566accf7e6..5bf5465a21 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, + hwaddr cxl_size = MiB; + + cxl_base = pc_get_cxl_range_start(pcms); +- e820_add_entry(cxl_base, cxl_size, E820_RESERVED); + memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); + memory_region_add_subregion(system_memory, cxl_base, mr); + cxl_resv_end = cxl_base + cxl_size; +@@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, + memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, fw, + "cxl-fixed-memory-region", fw->size); + memory_region_add_subregion(system_memory, fw->base, &fw->mr); +- e820_add_entry(fw->base, fw->size, E820_RESERVED); + cxl_fmw_base += fw->size; + cxl_resv_end = cxl_fmw_base; + } +-- +2.37.3 + +Early-boot e820 records will be inserted by the bios/efi/early boot +software and be reported to the kernel via insert_resource. Later, when +CXL drivers iterate through the regions again, they will insert another +resource and make the RESERVED memory area a child. + +This RESERVED memory area causes the memory region to become unusable, +and as a result attempting to create memory regions with + + `cxl create-region ...` + +Will fail due to the RESERVED area intersecting with the CXL window. + + +During boot the following traceback is observed: + +0xffffffff81101650 in insert_resource_expand_to_fit () +0xffffffff83d964c5 in e820__reserve_resources_late () +0xffffffff83e03210 in pcibios_resource_survey () +0xffffffff83e04f4a in pcibios_init () + +Which produces a call to reserve the CFMWS area: + +(gdb) p *new +$54 = {start = 0x290000000, end = 0x2cfffffff, name = "Reserved", + flags = 0x200, desc = 0x7, parent = 0x0, sibling = 0x0, + child = 0x0} + +Later the Kernel parses ACPI tables and reserves the exact same area as +the CXL Fixed Memory Window. The use of `insert_resource_conflict` +retains the RESERVED region and makes it a child of the new region. + +0xffffffff811016a4 in insert_resource_conflict () + insert_resource () +0xffffffff81a81389 in cxl_parse_cfmws () +0xffffffff818c4a81 in call_handler () + acpi_parse_entries_array () + +(gdb) p/x *new +$59 = {start = 0x290000000, end = 0x2cfffffff, name = "CXL Window 0", + flags = 0x200, desc = 0x0, parent = 0x0, sibling = 0x0, + child = 0x0} + +This produces the following output in /proc/iomem: + +590000000-68fffffff : CXL Window 0 + 590000000-68fffffff : Reserved + +This reserved area causes `get_free_mem_region()` to fail due to a check +against `__region_intersects()`. Due to this reserved area, the +intersect check will only ever return REGION_INTERSECTS, which causes +`cxl create-region` to always fail. + +Signed-off-by: Gregory Price <gregory.price@memverge.com> +--- + hw/i386/pc.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 566accf7e6..5bf5465a21 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, + hwaddr cxl_size = MiB; +cxl_base = pc_get_cxl_range_start(pcms); +- e820_add_entry(cxl_base, cxl_size, E820_RESERVED); + memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); + memory_region_add_subregion(system_memory, cxl_base, mr); + cxl_resv_end = cxl_base + cxl_size; +@@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, + memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, +fw, + "cxl-fixed-memory-region", fw->size); + memory_region_add_subregion(system_memory, fw->base, &fw->mr); +Or will this be subregion of cxl_base? + +Thanks, +Pankaj +- e820_add_entry(fw->base, fw->size, E820_RESERVED); + cxl_fmw_base += fw->size; + cxl_resv_end = cxl_fmw_base; + } + +> +> - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +> +> memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); +> +> memory_region_add_subregion(system_memory, cxl_base, mr); +> +> cxl_resv_end = cxl_base + cxl_size; +> +> @@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, +> +> memory_region_init_io(&fw->mr, OBJECT(machine), +> +> &cfmws_ops, fw, +> +> "cxl-fixed-memory-region", +> +> fw->size); +> +> memory_region_add_subregion(system_memory, fw->base, +> +> &fw->mr); +> +> +Or will this be subregion of cxl_base? +> +> +Thanks, +> +Pankaj +The memory region backing this memory area still has to be initialized +and added in the QEMU system, but it will now be initialized for use by +linux after PCI/ACPI setup occurs and the CXL driver discovers it via +CDAT. + +It's also still possible to assign this area a static memory region at +bool by setting up the SRATs in the ACPI tables, but that patch is not +upstream yet. + +On Tue, Oct 18, 2022 at 5:14 AM Gregory Price <gourry.memverge@gmail.com> wrote: +> +> +Early-boot e820 records will be inserted by the bios/efi/early boot +> +software and be reported to the kernel via insert_resource. Later, when +> +CXL drivers iterate through the regions again, they will insert another +> +resource and make the RESERVED memory area a child. +I have already sent a patch +https://www.mail-archive.com/qemu-devel@nongnu.org/msg882012.html +. +When the patch is applied, there would not be any reserved entries +even with passing E820_RESERVED . +So this patch needs to be evaluated in the light of the above patch I +sent. Once you apply my patch, does the issue still exist? + +> +> +This RESERVED memory area causes the memory region to become unusable, +> +and as a result attempting to create memory regions with +> +> +`cxl create-region ...` +> +> +Will fail due to the RESERVED area intersecting with the CXL window. +> +> +> +During boot the following traceback is observed: +> +> +0xffffffff81101650 in insert_resource_expand_to_fit () +> +0xffffffff83d964c5 in e820__reserve_resources_late () +> +0xffffffff83e03210 in pcibios_resource_survey () +> +0xffffffff83e04f4a in pcibios_init () +> +> +Which produces a call to reserve the CFMWS area: +> +> +(gdb) p *new +> +$54 = {start = 0x290000000, end = 0x2cfffffff, name = "Reserved", +> +flags = 0x200, desc = 0x7, parent = 0x0, sibling = 0x0, +> +child = 0x0} +> +> +Later the Kernel parses ACPI tables and reserves the exact same area as +> +the CXL Fixed Memory Window. The use of `insert_resource_conflict` +> +retains the RESERVED region and makes it a child of the new region. +> +> +0xffffffff811016a4 in insert_resource_conflict () +> +insert_resource () +> +0xffffffff81a81389 in cxl_parse_cfmws () +> +0xffffffff818c4a81 in call_handler () +> +acpi_parse_entries_array () +> +> +(gdb) p/x *new +> +$59 = {start = 0x290000000, end = 0x2cfffffff, name = "CXL Window 0", +> +flags = 0x200, desc = 0x0, parent = 0x0, sibling = 0x0, +> +child = 0x0} +> +> +This produces the following output in /proc/iomem: +> +> +590000000-68fffffff : CXL Window 0 +> +590000000-68fffffff : Reserved +> +> +This reserved area causes `get_free_mem_region()` to fail due to a check +> +against `__region_intersects()`. Due to this reserved area, the +> +intersect check will only ever return REGION_INTERSECTS, which causes +> +`cxl create-region` to always fail. +> +> +Signed-off-by: Gregory Price <gregory.price@memverge.com> +> +--- +> +hw/i386/pc.c | 2 -- +> +1 file changed, 2 deletions(-) +> +> +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +> +index 566accf7e6..5bf5465a21 100644 +> +--- a/hw/i386/pc.c +> ++++ b/hw/i386/pc.c +> +@@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, +> +hwaddr cxl_size = MiB; +> +> +cxl_base = pc_get_cxl_range_start(pcms); +> +- e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +> +memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); +> +memory_region_add_subregion(system_memory, cxl_base, mr); +> +cxl_resv_end = cxl_base + cxl_size; +> +@@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, +> +memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, +> +fw, +> +"cxl-fixed-memory-region", fw->size); +> +memory_region_add_subregion(system_memory, fw->base, +> +&fw->mr); +> +- e820_add_entry(fw->base, fw->size, E820_RESERVED); +> +cxl_fmw_base += fw->size; +> +cxl_resv_end = cxl_fmw_base; +> +} +> +-- +> +2.37.3 +> + +This patch does not resolve the issue, reserved entries are still created. +[  0.000000] BIOS-e820: [mem 0x0000000280000000-0x00000002800fffff] reserved +[  0.000000] BIOS-e820: [mem 0x0000000290000000-0x000000029fffffff] reserved +# cat /proc/iomem +290000000-29fffffff : CXL Window 0 + 290000000-29fffffff : Reserved +# cxl create-region -m -d decoder0.0 -w 1 -g 256 mem0 +cxl region: create_region: region0: set_size failed: Numerical result out of range +cxl region: cmd_create_region: created 0 regions +On Tue, Oct 18, 2022 at 2:05 AM Ani Sinha < +ani@anisinha.ca +> wrote: +On Tue, Oct 18, 2022 at 5:14 AM Gregory Price < +gourry.memverge@gmail.com +> wrote: +> +> Early-boot e820 records will be inserted by the bios/efi/early boot +> software and be reported to the kernel via insert_resource. Later, when +> CXL drivers iterate through the regions again, they will insert another +> resource and make the RESERVED memory area a child. +I have already sent a patch +https://www.mail-archive.com/qemu-devel@nongnu.org/msg882012.html +. +When the patch is applied, there would not be any reserved entries +even with passing E820_RESERVED . +So this patch needs to be evaluated in the light of the above patch I +sent. Once you apply my patch, does the issue still exist? +> +> This RESERVED memory area causes the memory region to become unusable, +> and as a result attempting to create memory regions with +> +>   `cxl create-region ...` +> +> Will fail due to the RESERVED area intersecting with the CXL window. +> +> +> During boot the following traceback is observed: +> +> 0xffffffff81101650 in insert_resource_expand_to_fit () +> 0xffffffff83d964c5 in e820__reserve_resources_late () +> 0xffffffff83e03210 in pcibios_resource_survey () +> 0xffffffff83e04f4a in pcibios_init () +> +> Which produces a call to reserve the CFMWS area: +> +> (gdb) p *new +> $54 = {start = 0x290000000, end = 0x2cfffffff, name = "Reserved", +>    flags = 0x200, desc = 0x7, parent = 0x0, sibling = 0x0, +>    child = 0x0} +> +> Later the Kernel parses ACPI tables and reserves the exact same area as +> the CXL Fixed Memory Window. The use of `insert_resource_conflict` +> retains the RESERVED region and makes it a child of the new region. +> +> 0xffffffff811016a4 in insert_resource_conflict () +>            insert_resource () +> 0xffffffff81a81389 in cxl_parse_cfmws () +> 0xffffffff818c4a81 in call_handler () +>            acpi_parse_entries_array () +> +> (gdb) p/x *new +> $59 = {start = 0x290000000, end = 0x2cfffffff, name = "CXL Window 0", +>    flags = 0x200, desc = 0x0, parent = 0x0, sibling = 0x0, +>    child = 0x0} +> +> This produces the following output in /proc/iomem: +> +> 590000000-68fffffff : CXL Window 0 +>  590000000-68fffffff : Reserved +> +> This reserved area causes `get_free_mem_region()` to fail due to a check +> against `__region_intersects()`. Due to this reserved area, the +> intersect check will only ever return REGION_INTERSECTS, which causes +> `cxl create-region` to always fail. +> +> Signed-off-by: Gregory Price < +gregory.price@memverge.com +> +> --- +> hw/i386/pc.c | 2 -- +> 1 file changed, 2 deletions(-) +> +> diff --git a/hw/i386/pc.c b/hw/i386/pc.c +> index 566accf7e6..5bf5465a21 100644 +> --- a/hw/i386/pc.c +> +++ b/hw/i386/pc.c +> @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, +>     hwaddr cxl_size = MiB; +> +>     cxl_base = pc_get_cxl_range_start(pcms); +> -    e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +>     memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); +>     memory_region_add_subregion(system_memory, cxl_base, mr); +>     cxl_resv_end = cxl_base + cxl_size; +> @@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, +>         memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, fw, +>                    "cxl-fixed-memory-region", fw->size); +>         memory_region_add_subregion(system_memory, fw->base, &fw->mr); +> -        e820_add_entry(fw->base, fw->size, E820_RESERVED); +>         cxl_fmw_base += fw->size; +>         cxl_resv_end = cxl_fmw_base; +>       } +> -- +> 2.37.3 +> + ++Gerd Hoffmann + +On Tue, Oct 18, 2022 at 8:16 PM Gregory Price <gourry.memverge@gmail.com> wrote: +> +> +This patch does not resolve the issue, reserved entries are still created. +> +> +[ 0.000000] BIOS-e820: [mem 0x0000000280000000-0x00000002800fffff] reserved +> +[ 0.000000] BIOS-e820: [mem 0x0000000290000000-0x000000029fffffff] reserved +> +> +# cat /proc/iomem +> +290000000-29fffffff : CXL Window 0 +> +290000000-29fffffff : Reserved +> +> +# cxl create-region -m -d decoder0.0 -w 1 -g 256 mem0 +> +cxl region: create_region: region0: set_size failed: Numerical result out of +> +range +> +cxl region: cmd_create_region: created 0 regions +> +> +On Tue, Oct 18, 2022 at 2:05 AM Ani Sinha <ani@anisinha.ca> wrote: +> +> +> +> On Tue, Oct 18, 2022 at 5:14 AM Gregory Price <gourry.memverge@gmail.com> +> +> wrote: +> +> > +> +> > Early-boot e820 records will be inserted by the bios/efi/early boot +> +> > software and be reported to the kernel via insert_resource. Later, when +> +> > CXL drivers iterate through the regions again, they will insert another +> +> > resource and make the RESERVED memory area a child. +> +> +> +> I have already sent a patch +> +> +https://www.mail-archive.com/qemu-devel@nongnu.org/msg882012.html +. +> +> When the patch is applied, there would not be any reserved entries +> +> even with passing E820_RESERVED . +> +> So this patch needs to be evaluated in the light of the above patch I +> +> sent. Once you apply my patch, does the issue still exist? +> +> +> +> > +> +> > This RESERVED memory area causes the memory region to become unusable, +> +> > and as a result attempting to create memory regions with +> +> > +> +> > `cxl create-region ...` +> +> > +> +> > Will fail due to the RESERVED area intersecting with the CXL window. +> +> > +> +> > +> +> > During boot the following traceback is observed: +> +> > +> +> > 0xffffffff81101650 in insert_resource_expand_to_fit () +> +> > 0xffffffff83d964c5 in e820__reserve_resources_late () +> +> > 0xffffffff83e03210 in pcibios_resource_survey () +> +> > 0xffffffff83e04f4a in pcibios_init () +> +> > +> +> > Which produces a call to reserve the CFMWS area: +> +> > +> +> > (gdb) p *new +> +> > $54 = {start = 0x290000000, end = 0x2cfffffff, name = "Reserved", +> +> > flags = 0x200, desc = 0x7, parent = 0x0, sibling = 0x0, +> +> > child = 0x0} +> +> > +> +> > Later the Kernel parses ACPI tables and reserves the exact same area as +> +> > the CXL Fixed Memory Window. The use of `insert_resource_conflict` +> +> > retains the RESERVED region and makes it a child of the new region. +> +> > +> +> > 0xffffffff811016a4 in insert_resource_conflict () +> +> > insert_resource () +> +> > 0xffffffff81a81389 in cxl_parse_cfmws () +> +> > 0xffffffff818c4a81 in call_handler () +> +> > acpi_parse_entries_array () +> +> > +> +> > (gdb) p/x *new +> +> > $59 = {start = 0x290000000, end = 0x2cfffffff, name = "CXL Window 0", +> +> > flags = 0x200, desc = 0x0, parent = 0x0, sibling = 0x0, +> +> > child = 0x0} +> +> > +> +> > This produces the following output in /proc/iomem: +> +> > +> +> > 590000000-68fffffff : CXL Window 0 +> +> > 590000000-68fffffff : Reserved +> +> > +> +> > This reserved area causes `get_free_mem_region()` to fail due to a check +> +> > against `__region_intersects()`. Due to this reserved area, the +> +> > intersect check will only ever return REGION_INTERSECTS, which causes +> +> > `cxl create-region` to always fail. +> +> > +> +> > Signed-off-by: Gregory Price <gregory.price@memverge.com> +> +> > --- +> +> > hw/i386/pc.c | 2 -- +> +> > 1 file changed, 2 deletions(-) +> +> > +> +> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c +> +> > index 566accf7e6..5bf5465a21 100644 +> +> > --- a/hw/i386/pc.c +> +> > +++ b/hw/i386/pc.c +> +> > @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, +> +> > hwaddr cxl_size = MiB; +> +> > +> +> > cxl_base = pc_get_cxl_range_start(pcms); +> +> > - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +> +> > memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); +> +> > memory_region_add_subregion(system_memory, cxl_base, mr); +> +> > cxl_resv_end = cxl_base + cxl_size; +> +> > @@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, +> +> > memory_region_init_io(&fw->mr, OBJECT(machine), +> +> > &cfmws_ops, fw, +> +> > "cxl-fixed-memory-region", +> +> > fw->size); +> +> > memory_region_add_subregion(system_memory, fw->base, +> +> > &fw->mr); +> +> > - e820_add_entry(fw->base, fw->size, E820_RESERVED); +> +> > cxl_fmw_base += fw->size; +> +> > cxl_resv_end = cxl_fmw_base; +> +> > } +> +> > -- +> +> > 2.37.3 +> +> > + +> +>> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c +> +>> > index 566accf7e6..5bf5465a21 100644 +> +>> > --- a/hw/i386/pc.c +> +>> > +++ b/hw/i386/pc.c +> +>> > @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, +> +>> > hwaddr cxl_size = MiB; +> +>> > +> +>> > cxl_base = pc_get_cxl_range_start(pcms); +> +>> > - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +Just dropping it doesn't look like a good plan to me. + +You can try set etc/reserved-memory-end fw_cfg file instead. Firmware +(both seabios and ovmf) read it and will make sure the 64bit pci mmio +window is placed above that address, i.e. this effectively reserves +address space. Right now used by memory hotplug code, but should work +for cxl too I think (disclaimer: don't know much about cxl ...). + +take care & HTH, + Gerd + +On Tue, 8 Nov 2022 12:21:11 +0100 +Gerd Hoffmann <kraxel@redhat.com> wrote: + +> +> >> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c +> +> >> > index 566accf7e6..5bf5465a21 100644 +> +> >> > --- a/hw/i386/pc.c +> +> >> > +++ b/hw/i386/pc.c +> +> >> > @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, +> +> >> > hwaddr cxl_size = MiB; +> +> >> > +> +> >> > cxl_base = pc_get_cxl_range_start(pcms); +> +> >> > - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +> +> +Just dropping it doesn't look like a good plan to me. +> +> +You can try set etc/reserved-memory-end fw_cfg file instead. Firmware +> +(both seabios and ovmf) read it and will make sure the 64bit pci mmio +> +window is placed above that address, i.e. this effectively reserves +> +address space. Right now used by memory hotplug code, but should work +> +for cxl too I think (disclaimer: don't know much about cxl ...). +As far as I know CXL impl. in QEMU isn't using etc/reserved-memory-end +at all, it' has its own mapping. + +Regardless of that, reserved E820 entries look wrong, and looking at +commit message OS is right to bailout on them (expected according +to ACPI spec). +Also spec says + +" +E820 Assumptions and Limitations + [...] + The platform boot firmware does not return a range description for the memory +mapping of + PCI devices, ISA Option ROMs, and ISA Plug and Play cards because the OS has +mechanisms + available to detect them. +" + +so dropping reserved entries looks reasonable from ACPI spec point of view. +(disclaimer: don't know much about cxl ... either) +> +> +take care & HTH, +> +Gerd +> + +On Fri, Nov 11, 2022 at 11:51:23AM +0100, Igor Mammedov wrote: +> +On Tue, 8 Nov 2022 12:21:11 +0100 +> +Gerd Hoffmann <kraxel@redhat.com> wrote: +> +> +> > >> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c +> +> > >> > index 566accf7e6..5bf5465a21 100644 +> +> > >> > --- a/hw/i386/pc.c +> +> > >> > +++ b/hw/i386/pc.c +> +> > >> > @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, +> +> > >> > hwaddr cxl_size = MiB; +> +> > >> > +> +> > >> > cxl_base = pc_get_cxl_range_start(pcms); +> +> > >> > - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +> +> +> +> Just dropping it doesn't look like a good plan to me. +> +> +> +> You can try set etc/reserved-memory-end fw_cfg file instead. Firmware +> +> (both seabios and ovmf) read it and will make sure the 64bit pci mmio +> +> window is placed above that address, i.e. this effectively reserves +> +> address space. Right now used by memory hotplug code, but should work +> +> for cxl too I think (disclaimer: don't know much about cxl ...). +> +> +As far as I know CXL impl. in QEMU isn't using etc/reserved-memory-end +> +at all, it' has its own mapping. +This should be changed. cxl should make sure the highest address used +is stored in etc/reserved-memory-end to avoid the firmware mapping pci +resources there. + +> +so dropping reserved entries looks reasonable from ACPI spec point of view. +Yep, I don't want dispute that. + +I suspect the reason for these entries to exist in the first place is to +inform the firmware that it should not place stuff there, and if we +remove that to conform with the spec we need some alternative way for +that ... + +take care, + Gerd + +On Fri, 11 Nov 2022 12:40:59 +0100 +Gerd Hoffmann <kraxel@redhat.com> wrote: + +> +On Fri, Nov 11, 2022 at 11:51:23AM +0100, Igor Mammedov wrote: +> +> On Tue, 8 Nov 2022 12:21:11 +0100 +> +> Gerd Hoffmann <kraxel@redhat.com> wrote: +> +> +> +> > > >> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c +> +> > > >> > index 566accf7e6..5bf5465a21 100644 +> +> > > >> > --- a/hw/i386/pc.c +> +> > > >> > +++ b/hw/i386/pc.c +> +> > > >> > @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, +> +> > > >> > hwaddr cxl_size = MiB; +> +> > > >> > +> +> > > >> > cxl_base = pc_get_cxl_range_start(pcms); +> +> > > >> > - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +> +> > +> +> > Just dropping it doesn't look like a good plan to me. +> +> > +> +> > You can try set etc/reserved-memory-end fw_cfg file instead. Firmware +> +> > (both seabios and ovmf) read it and will make sure the 64bit pci mmio +> +> > window is placed above that address, i.e. this effectively reserves +> +> > address space. Right now used by memory hotplug code, but should work +> +> > for cxl too I think (disclaimer: don't know much about cxl ...). +> +> +> +> As far as I know CXL impl. in QEMU isn't using etc/reserved-memory-end +> +> at all, it' has its own mapping. +> +> +This should be changed. cxl should make sure the highest address used +> +is stored in etc/reserved-memory-end to avoid the firmware mapping pci +> +resources there. +if (pcmc->has_reserved_memory && machine->device_memory->base) { + +[...] + + if (pcms->cxl_devices_state.is_enabled) { + + res_mem_end = cxl_resv_end; + +that should be handled by this line + + } + + *val = cpu_to_le64(ROUND_UP(res_mem_end, 1 * GiB)); + + fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val)); + + } + +so SeaBIOS shouldn't intrude into CXL address space +(I assume EDK2 behave similarly here) + +> +> so dropping reserved entries looks reasonable from ACPI spec point of view. +> +> +> +> +Yep, I don't want dispute that. +> +> +I suspect the reason for these entries to exist in the first place is to +> +inform the firmware that it should not place stuff there, and if we +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +just to educate me, can you point out what SeaBIOS code does with reservations. + +> +remove that to conform with the spec we need some alternative way for +> +that ... +with etc/reserved-memory-end set as above, +is E820_RESERVED really needed here? + +(my understanding was that E820_RESERVED weren't accounted for when +initializing PCI devices) + +> +> +take care, +> +Gerd +> + +> +if (pcmc->has_reserved_memory && machine->device_memory->base) { +> +> +[...] +> +> +if (pcms->cxl_devices_state.is_enabled) { +> +> +res_mem_end = cxl_resv_end; +> +> +that should be handled by this line +> +> +} +> +> +*val = cpu_to_le64(ROUND_UP(res_mem_end, 1 * GiB)); +> +> +fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, +> +sizeof(*val)); +> +} +> +> +so SeaBIOS shouldn't intrude into CXL address space +Yes, looks good, so with this in place already everyting should be fine. + +> +(I assume EDK2 behave similarly here) +Correct, ovmf reads that fw_cfg file too. + +> +> I suspect the reason for these entries to exist in the first place is to +> +> inform the firmware that it should not place stuff there, and if we +> +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +> +just to educate me, can you point out what SeaBIOS code does with +> +reservations. +They are added to the e820 map which gets passed on to the OS. seabios +uses (and updateas) the e820 map too, when allocating memory for +example. While thinking about it I'm not fully sure it actually looks +at reservations, maybe it only uses (and updates) ram entries when +allocating memory. + +> +> remove that to conform with the spec we need some alternative way for +> +> that ... +> +> +with etc/reserved-memory-end set as above, +> +is E820_RESERVED really needed here? +No. Setting etc/reserved-memory-end is enough. + +So for the original patch: +Acked-by: Gerd Hoffmann <kraxel@redhat.com> + +take care, + Gerd + +On Fri, Nov 11, 2022 at 02:36:02PM +0100, Gerd Hoffmann wrote: +> +> if (pcmc->has_reserved_memory && machine->device_memory->base) { +> +> +> +> [...] +> +> +> +> if (pcms->cxl_devices_state.is_enabled) { +> +> +> +> res_mem_end = cxl_resv_end; +> +> +> +> that should be handled by this line +> +> +> +> } +> +> +> +> *val = cpu_to_le64(ROUND_UP(res_mem_end, 1 * GiB)); +> +> +> +> fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, +> +> sizeof(*val)); +> +> } +> +> +> +> so SeaBIOS shouldn't intrude into CXL address space +> +> +Yes, looks good, so with this in place already everyting should be fine. +> +> +> (I assume EDK2 behave similarly here) +> +> +Correct, ovmf reads that fw_cfg file too. +> +> +> > I suspect the reason for these entries to exist in the first place is to +> +> > inform the firmware that it should not place stuff there, and if we +> +> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +> +> just to educate me, can you point out what SeaBIOS code does with +> +> reservations. +> +> +They are added to the e820 map which gets passed on to the OS. seabios +> +uses (and updateas) the e820 map too, when allocating memory for +> +example. While thinking about it I'm not fully sure it actually looks +> +at reservations, maybe it only uses (and updates) ram entries when +> +allocating memory. +> +> +> > remove that to conform with the spec we need some alternative way for +> +> > that ... +> +> +> +> with etc/reserved-memory-end set as above, +> +> is E820_RESERVED really needed here? +> +> +No. Setting etc/reserved-memory-end is enough. +> +> +So for the original patch: +> +Acked-by: Gerd Hoffmann <kraxel@redhat.com> +> +> +take care, +> +Gerd +It's upstream already, sorry I can't add your tag. + +-- +MST + diff --git a/results/classifier/009/PID/70294255 b/results/classifier/009/PID/70294255 new file mode 100644 index 00000000..04cebede --- /dev/null +++ b/results/classifier/009/PID/70294255 @@ -0,0 +1,1071 @@ +PID: 0.859 +semantic: 0.858 +socket: 0.858 +device: 0.857 +graphic: 0.857 +debug: 0.854 +permissions: 0.854 +other: 0.852 +performance: 0.850 +network: 0.846 +vnc: 0.837 +files: 0.832 +boot: 0.811 +KVM: 0.806 + +[Qemu-devel] 答复: Re: 答复: Re: 答复: Re: 答复: Re: [BUG]COLO failover hang + +hi: + +yes.it is better. + +And should we delete + + + + +#ifdef WIN32 + + QIO_CHANNEL(cioc)-ï¼event = CreateEvent(NULL, FALSE, FALSE, NULL) + +#endif + + + + +in qio_channel_socket_acceptï¼ + +qio_channel_socket_new already have it. + + + + + + + + + + + + +åå§é®ä»¶ + + + +åä»¶äººï¼ address@hidden +æ¶ä»¶äººï¼ç广10165992 +æéäººï¼ address@hidden address@hidden address@hidden address@hidden +æ¥ æ ï¼2017å¹´03æ22æ¥ 15:03 +主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: çå¤: Re: çå¤: Re: [BUG]COLO failover hang + + + + + +Hi, + +On 2017/3/22 9:42, address@hidden wrote: +ï¼ diff --git a/migration/socket.c b/migration/socket.c +ï¼ +ï¼ +ï¼ index 13966f1..d65a0ea 100644 +ï¼ +ï¼ +ï¼ --- a/migration/socket.c +ï¼ +ï¼ +ï¼ +++ b/migration/socket.c +ï¼ +ï¼ +ï¼ @@ -147,8 +147,9 @@ static gboolean +socket_accept_incoming_migration(QIOChannel *ioc, +ï¼ +ï¼ +ï¼ } +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ trace_migration_socket_incoming_accepted() +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") +ï¼ +ï¼ +ï¼ + qio_channel_set_feature(QIO_CHANNEL(sioc), QIO_CHANNEL_FEATURE_SHUTDOWN) +ï¼ +ï¼ +ï¼ migration_channel_process_incoming(migrate_get_current(), +ï¼ +ï¼ +ï¼ QIO_CHANNEL(sioc)) +ï¼ +ï¼ +ï¼ object_unref(OBJECT(sioc)) +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ Is this patch ok? +ï¼ + +Yes, i think this works, but a better way maybe to call +qio_channel_set_feature() +in qio_channel_socket_accept(), we didn't set the SHUTDOWN feature for the +socket accept fd, +Or fix it by this: + +diff --git a/io/channel-socket.c b/io/channel-socket.c +index f546c68..ce6894c 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -330,9 +330,8 @@ qio_channel_socket_accept(QIOChannelSocket *ioc, + Error **errp) + { + QIOChannelSocket *cioc +- +- cioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET)) +- cioc-ï¼fd = -1 ++ ++ cioc = qio_channel_socket_new() + cioc-ï¼remoteAddrLen = sizeof(ioc-ï¼remoteAddr) + cioc-ï¼localAddrLen = sizeof(ioc-ï¼localAddr) + + +Thanks, +Hailiang + +ï¼ I have test it . The test could not hang any more. +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ åå§é®ä»¶ +ï¼ +ï¼ +ï¼ +ï¼ åä»¶äººï¼ address@hidden +ï¼ æ¶ä»¶äººï¼ address@hidden address@hidden +ï¼ æéäººï¼ address@hidden address@hidden address@hidden +ï¼ æ¥ æ ï¼2017å¹´03æ22æ¥ 09:11 +ï¼ ä¸» é¢ ï¼Re: [Qemu-devel] çå¤: Re: çå¤: Re: [BUG]COLO failover hang +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ On 2017/3/21 19:56, Dr. David Alan Gilbert wrote: +ï¼ ï¼ * Hailiang Zhang (address@hidden) wrote: +ï¼ ï¼ï¼ Hi, +ï¼ ï¼ï¼ +ï¼ ï¼ï¼ Thanks for reporting this, and i confirmed it in my test, and it is a bug. +ï¼ ï¼ï¼ +ï¼ ï¼ï¼ Though we tried to call qemu_file_shutdown() to shutdown the related fd, in +ï¼ ï¼ï¼ case COLO thread/incoming thread is stuck in read/write() while do +failover, +ï¼ ï¼ï¼ but it didn't take effect, because all the fd used by COLO (also migration) +ï¼ ï¼ï¼ has been wrapped by qio channel, and it will not call the shutdown API if +ï¼ ï¼ï¼ we didn't qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN). +ï¼ ï¼ï¼ +ï¼ ï¼ï¼ Cc: Dr. David Alan Gilbert address@hidden +ï¼ ï¼ï¼ +ï¼ ï¼ï¼ I doubted migration cancel has the same problem, it may be stuck in write() +ï¼ ï¼ï¼ if we tried to cancel migration. +ï¼ ï¼ï¼ +ï¼ ï¼ï¼ void fd_start_outgoing_migration(MigrationState *s, const char *fdname, +Error **errp) +ï¼ ï¼ï¼ { +ï¼ ï¼ï¼ qio_channel_set_name(QIO_CHANNEL(ioc), "migration-fd-outgoing") +ï¼ ï¼ï¼ migration_channel_connect(s, ioc, NULL) +ï¼ ï¼ï¼ ... ... +ï¼ ï¼ï¼ We didn't call qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN) above, +ï¼ ï¼ï¼ and the +ï¼ ï¼ï¼ migrate_fd_cancel() +ï¼ ï¼ï¼ { +ï¼ ï¼ï¼ ... ... +ï¼ ï¼ï¼ if (s-ï¼state == MIGRATION_STATUS_CANCELLING && f) { +ï¼ ï¼ï¼ qemu_file_shutdown(f) --ï¼ This will not take effect. No ? +ï¼ ï¼ï¼ } +ï¼ ï¼ï¼ } +ï¼ ï¼ +ï¼ ï¼ (cc'd in Daniel Berrange). +ï¼ ï¼ I see that we call qio_channel_set_feature(ioc, +QIO_CHANNEL_FEATURE_SHUTDOWN) at the +ï¼ ï¼ top of qio_channel_socket_new so I think that's safe isn't it? +ï¼ ï¼ +ï¼ +ï¼ Hmm, you are right, this problem is only exist for the migration incoming fd, +thanks. +ï¼ +ï¼ ï¼ Dave +ï¼ ï¼ +ï¼ ï¼ï¼ Thanks, +ï¼ ï¼ï¼ Hailiang +ï¼ ï¼ï¼ +ï¼ ï¼ï¼ On 2017/3/21 16:10, address@hidden wrote: +ï¼ ï¼ï¼ï¼ Thank youã +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ I have test areadyã +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ When the Primary Node panic,the Secondary Node qemu hang at the same +placeã +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ Incorrding +http://wiki.qemu-project.org/Features/COLO +ï¼kill Primary Node +qemu will not produce the problem,but Primary Node panic canã +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ I think due to the feature of channel does not support +QIO_CHANNEL_FEATURE_SHUTDOWN. +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ when failover,channel_shutdown could not shut down the channel. +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ so the colo_process_incoming_thread will hang at recvmsg. +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ I test a patch: +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ diff --git a/migration/socket.c b/migration/socket.c +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ index 13966f1..d65a0ea 100644 +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ --- a/migration/socket.c +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +++ b/migration/socket.c +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ @@ -147,8 +147,9 @@ static gboolean +socket_accept_incoming_migration(QIOChannel *ioc, +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ } +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ trace_migration_socket_incoming_accepted() +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ qio_channel_set_name(QIO_CHANNEL(sioc), +"migration-socket-incoming") +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ + qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN) +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ migration_channel_process_incoming(migrate_get_current(), +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ QIO_CHANNEL(sioc)) +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ object_unref(OBJECT(sioc)) +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ My test will not hang any more. +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ åå§é®ä»¶ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ åä»¶äººï¼ address@hidden +ï¼ ï¼ï¼ï¼ æ¶ä»¶äººï¼ç广10165992 address@hidden +ï¼ ï¼ï¼ï¼ æéäººï¼ address@hidden address@hidden +ï¼ ï¼ï¼ï¼ æ¥ æ ï¼2017å¹´03æ21æ¥ 15:58 +ï¼ ï¼ï¼ï¼ 主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: [BUG]COLO failover hang +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ Hi,Wang. +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ You can test this branch: +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ and please follow wiki ensure your own configuration correctly. +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +http://wiki.qemu-project.org/Features/COLO +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ Thanks +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ Zhang Chen +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ On 03/21/2017 03:27 PM, address@hidden wrote: +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ hi. +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ I test the git qemu master have the same problem. +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ (gdb) bt +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, +ï¼ ï¼ï¼ï¼ ï¼ niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #1 0x00007f658e4aa0c2 in qio_channel_read +ï¼ ï¼ï¼ï¼ ï¼ (address@hidden, address@hidden "", +ï¼ ï¼ï¼ï¼ ï¼ address@hidden, address@hidden) at io/channel.c:114 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #2 0x00007f658e3ea990 in channel_get_buffer (opaque=ï¼optimized outï¼, +ï¼ ï¼ï¼ï¼ ï¼ buf=0x7f65907cb838 "", pos=ï¼optimized outï¼, size=32768) at +ï¼ ï¼ï¼ï¼ ï¼ migration/qemu-file-channel.c:78 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at +ï¼ ï¼ï¼ï¼ ï¼ migration/qemu-file.c:295 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, +ï¼ ï¼ï¼ï¼ ï¼ address@hidden) at migration/qemu-file.c:555 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at +ï¼ ï¼ï¼ï¼ ï¼ migration/qemu-file.c:568 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at +ï¼ ï¼ï¼ï¼ ï¼ migration/qemu-file.c:648 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, +ï¼ ï¼ï¼ï¼ ï¼ address@hidden) at migration/colo.c:244 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #8 0x00007f658e3e681e in colo_receive_check_message (f=ï¼optimized +ï¼ ï¼ï¼ï¼ ï¼ outï¼, address@hidden, +ï¼ ï¼ï¼ï¼ ï¼ address@hidden) +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ at migration/colo.c:264 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #9 0x00007f658e3e740e in colo_process_incoming_thread +ï¼ ï¼ï¼ï¼ ï¼ (opaque=0x7f658eb30360 ï¼mis_current.31286ï¼) at migration/colo.c:577 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ (gdb) p ioc-ï¼name +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ $2 = 0x7f658ff7d5c0 "migration-socket-incoming" +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ (gdb) p ioc-ï¼features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ $3 = 0 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ (gdb) bt +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, +ï¼ ï¼ï¼ï¼ ï¼ condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #1 0x00007fdcc6966350 in g_main_dispatch (context=ï¼optimized outï¼) at +ï¼ ï¼ï¼ï¼ ï¼ gmain.c:3054 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #2 g_main_context_dispatch (context=ï¼optimized outï¼, +ï¼ ï¼ï¼ï¼ ï¼ address@hidden) at gmain.c:3630 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #4 os_host_main_loop_wait (timeout=ï¼optimized outï¼) at +ï¼ ï¼ï¼ï¼ ï¼ util/main-loop.c:258 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #5 main_loop_wait (address@hidden) at +ï¼ ï¼ï¼ï¼ ï¼ util/main-loop.c:506 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #6 0x00007fdccb526187 in main_loop () at vl.c:1898 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #7 main (argc=ï¼optimized outï¼, argv=ï¼optimized outï¼, envp=ï¼optimized +ï¼ ï¼ï¼ï¼ ï¼ outï¼) at vl.c:4709 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ (gdb) p ioc-ï¼features +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ $1 = 6 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ (gdb) p ioc-ï¼name +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ $2 = 0x7fdcce1b1ab0 "migration-socket-listener" +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ May be socket_accept_incoming_migration should +ï¼ ï¼ï¼ï¼ ï¼ call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ thank you. +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ åå§é®ä»¶ +ï¼ ï¼ï¼ï¼ ï¼ address@hidden +ï¼ ï¼ï¼ï¼ ï¼ address@hidden +ï¼ ï¼ï¼ï¼ ï¼ address@hidden@huawei.comï¼ +ï¼ ï¼ï¼ï¼ ï¼ *æ¥ æ ï¼*2017å¹´03æ16æ¥ 14:46 +ï¼ ï¼ï¼ï¼ ï¼ *主 é¢ ï¼**Re: [Qemu-devel] COLO failover hang* +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ On 03/15/2017 05:06 PM, wangguang wrote: +ï¼ ï¼ï¼ï¼ ï¼ ï¼ am testing QEMU COLO feature described here [QEMU +ï¼ ï¼ï¼ï¼ ï¼ ï¼ Wiki]( +http://wiki.qemu-project.org/Features/COLO +). +ï¼ ï¼ï¼ï¼ ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ ï¼ When the Primary Node panic,the Secondary Node qemu hang. +ï¼ ï¼ï¼ï¼ ï¼ ï¼ hang at recvmsg in qio_channel_socket_readv. +ï¼ ï¼ï¼ï¼ ï¼ ï¼ And I run { 'execute': 'nbd-server-stop' } and { "execute": +ï¼ ï¼ï¼ï¼ ï¼ ï¼ "x-colo-lost-heartbeat" } in Secondary VM's +ï¼ ï¼ï¼ï¼ ï¼ ï¼ monitor,the Secondary Node qemu still hang at recvmsg . +ï¼ ï¼ï¼ï¼ ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ ï¼ I found that the colo in qemu is not complete yet. +ï¼ ï¼ï¼ï¼ ï¼ ï¼ Do the colo have any plan for development? +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ Yes, We are developing. You can see some of patch we pushing. +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ ï¼ Has anyone ever run it successfully? Any help is appreciated! +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ In our internal version can run it successfully, +ï¼ ï¼ï¼ï¼ ï¼ The failover detail you can ask Zhanghailiang for help. +ï¼ ï¼ï¼ï¼ ï¼ Next time if you have some question about COLO, +ï¼ ï¼ï¼ï¼ ï¼ please cc me and zhanghailiang address@hidden +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ Thanks +ï¼ ï¼ï¼ï¼ ï¼ Zhang Chen +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ ï¼ centos7.2+qemu2.7.50 +ï¼ ï¼ï¼ï¼ ï¼ ï¼ (gdb) bt +ï¼ ï¼ï¼ï¼ ï¼ ï¼ #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 +ï¼ ï¼ï¼ï¼ ï¼ ï¼ #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=ï¼optimized +outï¼, +ï¼ ï¼ï¼ï¼ ï¼ ï¼ iov=ï¼optimized outï¼, niov=ï¼optimized outï¼, fds=0x0, nfds=0x0, +errp=0x0) at +ï¼ ï¼ï¼ï¼ ï¼ ï¼ io/channel-socket.c:497 +ï¼ ï¼ï¼ï¼ ï¼ ï¼ #2 0x00007f3e03329472 in qio_channel_read (address@hidden, +ï¼ ï¼ï¼ï¼ ï¼ ï¼ address@hidden "", address@hidden, +ï¼ ï¼ï¼ï¼ ï¼ ï¼ address@hidden) at io/channel.c:97 +ï¼ ï¼ï¼ï¼ ï¼ ï¼ #3 0x00007f3e032750e0 in channel_get_buffer (opaque=ï¼optimized outï¼, +ï¼ ï¼ï¼ï¼ ï¼ ï¼ buf=0x7f3e05910f38 "", pos=ï¼optimized outï¼, size=32768) at +ï¼ ï¼ï¼ï¼ ï¼ ï¼ migration/qemu-file-channel.c:78 +ï¼ ï¼ï¼ï¼ ï¼ ï¼ #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at +ï¼ ï¼ï¼ï¼ ï¼ ï¼ migration/qemu-file.c:257 +ï¼ ï¼ï¼ï¼ ï¼ ï¼ #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, +ï¼ ï¼ï¼ï¼ ï¼ ï¼ address@hidden) at migration/qemu-file.c:510 +ï¼ ï¼ï¼ï¼ ï¼ ï¼ #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at +ï¼ ï¼ï¼ï¼ ï¼ ï¼ migration/qemu-file.c:523 +ï¼ ï¼ï¼ï¼ ï¼ ï¼ #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at +ï¼ ï¼ï¼ï¼ ï¼ ï¼ migration/qemu-file.c:603 +ï¼ ï¼ï¼ï¼ ï¼ ï¼ #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, +ï¼ ï¼ï¼ï¼ ï¼ ï¼ address@hidden) at migration/colo.c:215 +ï¼ ï¼ï¼ï¼ ï¼ ï¼ #9 0x00007f3e0327250d in colo_wait_handle_message +(errp=0x7f3d62bfaa48, +ï¼ ï¼ï¼ï¼ ï¼ ï¼ checkpoint_request=ï¼synthetic pointerï¼, f=ï¼optimized outï¼) at +ï¼ ï¼ï¼ï¼ ï¼ ï¼ migration/colo.c:546 +ï¼ ï¼ï¼ï¼ ï¼ ï¼ #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at +ï¼ ï¼ï¼ï¼ ï¼ ï¼ migration/colo.c:649 +ï¼ ï¼ï¼ï¼ ï¼ ï¼ #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 +ï¼ ï¼ï¼ï¼ ï¼ ï¼ #12 0x00007f3dfc9c03ed in clone () from /lib64/libc..so.6 +ï¼ ï¼ï¼ï¼ ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ ï¼ -- +ï¼ ï¼ï¼ï¼ ï¼ ï¼ View this message in context: +http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html +ï¼ ï¼ï¼ï¼ ï¼ ï¼ Sent from the Developer mailing list archive at Nabble.com. +ï¼ ï¼ï¼ï¼ ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ -- +ï¼ ï¼ï¼ï¼ ï¼ Thanks +ï¼ ï¼ï¼ï¼ ï¼ Zhang Chen +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ +ï¼ ï¼ -- +ï¼ ï¼ Dr. David Alan Gilbert / address@hidden / Manchester, UK +ï¼ ï¼ +ï¼ ï¼ . +ï¼ ï¼ +ï¼ + +On 2017/3/22 16:09, address@hidden wrote: +hi: + +yes.it is better. + +And should we delete +Yes, you are right. +#ifdef WIN32 + + QIO_CHANNEL(cioc)-ï¼event = CreateEvent(NULL, FALSE, FALSE, NULL) + +#endif + + + + +in qio_channel_socket_acceptï¼ + +qio_channel_socket_new already have it. + + + + + + + + + + + + +åå§é®ä»¶ + + + +åä»¶äººï¼ address@hidden +æ¶ä»¶äººï¼ç广10165992 +æéäººï¼ address@hidden address@hidden address@hidden address@hidden +æ¥ æ ï¼2017å¹´03æ22æ¥ 15:03 +主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: çå¤: Re: çå¤: Re: [BUG]COLO failover hang + + + + + +Hi, + +On 2017/3/22 9:42, address@hidden wrote: +ï¼ diff --git a/migration/socket.c b/migration/socket.c +ï¼ +ï¼ +ï¼ index 13966f1..d65a0ea 100644 +ï¼ +ï¼ +ï¼ --- a/migration/socket.c +ï¼ +ï¼ +ï¼ +++ b/migration/socket.c +ï¼ +ï¼ +ï¼ @@ -147,8 +147,9 @@ static gboolean +socket_accept_incoming_migration(QIOChannel *ioc, +ï¼ +ï¼ +ï¼ } +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ trace_migration_socket_incoming_accepted() +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") +ï¼ +ï¼ +ï¼ + qio_channel_set_feature(QIO_CHANNEL(sioc), QIO_CHANNEL_FEATURE_SHUTDOWN) +ï¼ +ï¼ +ï¼ migration_channel_process_incoming(migrate_get_current(), +ï¼ +ï¼ +ï¼ QIO_CHANNEL(sioc)) +ï¼ +ï¼ +ï¼ object_unref(OBJECT(sioc)) +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ Is this patch ok? +ï¼ + +Yes, i think this works, but a better way maybe to call +qio_channel_set_feature() +in qio_channel_socket_accept(), we didn't set the SHUTDOWN feature for the +socket accept fd, +Or fix it by this: + +diff --git a/io/channel-socket.c b/io/channel-socket.c +index f546c68..ce6894c 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -330,9 +330,8 @@ qio_channel_socket_accept(QIOChannelSocket *ioc, + Error **errp) + { + QIOChannelSocket *cioc +- +- cioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET)) +- cioc-ï¼fd = -1 ++ ++ cioc = qio_channel_socket_new() + cioc-ï¼remoteAddrLen = sizeof(ioc-ï¼remoteAddr) + cioc-ï¼localAddrLen = sizeof(ioc-ï¼localAddr) + + +Thanks, +Hailiang + +ï¼ I have test it . The test could not hang any more. +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ åå§é®ä»¶ +ï¼ +ï¼ +ï¼ +ï¼ åä»¶äººï¼ address@hidden +ï¼ æ¶ä»¶äººï¼ address@hidden address@hidden +ï¼ æéäººï¼ address@hidden address@hidden address@hidden +ï¼ æ¥ æ ï¼2017å¹´03æ22æ¥ 09:11 +ï¼ ä¸» é¢ ï¼Re: [Qemu-devel] çå¤: Re: çå¤: Re: [BUG]COLO failover hang +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ On 2017/3/21 19:56, Dr. David Alan Gilbert wrote: +ï¼ ï¼ * Hailiang Zhang (address@hidden) wrote: +ï¼ ï¼ï¼ Hi, +ï¼ ï¼ï¼ +ï¼ ï¼ï¼ Thanks for reporting this, and i confirmed it in my test, and it is a bug. +ï¼ ï¼ï¼ +ï¼ ï¼ï¼ Though we tried to call qemu_file_shutdown() to shutdown the related fd, in +ï¼ ï¼ï¼ case COLO thread/incoming thread is stuck in read/write() while do +failover, +ï¼ ï¼ï¼ but it didn't take effect, because all the fd used by COLO (also migration) +ï¼ ï¼ï¼ has been wrapped by qio channel, and it will not call the shutdown API if +ï¼ ï¼ï¼ we didn't qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN). +ï¼ ï¼ï¼ +ï¼ ï¼ï¼ Cc: Dr. David Alan Gilbert address@hidden +ï¼ ï¼ï¼ +ï¼ ï¼ï¼ I doubted migration cancel has the same problem, it may be stuck in write() +ï¼ ï¼ï¼ if we tried to cancel migration. +ï¼ ï¼ï¼ +ï¼ ï¼ï¼ void fd_start_outgoing_migration(MigrationState *s, const char *fdname, +Error **errp) +ï¼ ï¼ï¼ { +ï¼ ï¼ï¼ qio_channel_set_name(QIO_CHANNEL(ioc), "migration-fd-outgoing") +ï¼ ï¼ï¼ migration_channel_connect(s, ioc, NULL) +ï¼ ï¼ï¼ ... ... +ï¼ ï¼ï¼ We didn't call qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN) above, +ï¼ ï¼ï¼ and the +ï¼ ï¼ï¼ migrate_fd_cancel() +ï¼ ï¼ï¼ { +ï¼ ï¼ï¼ ... ... +ï¼ ï¼ï¼ if (s-ï¼state == MIGRATION_STATUS_CANCELLING && f) { +ï¼ ï¼ï¼ qemu_file_shutdown(f) --ï¼ This will not take effect. No ? +ï¼ ï¼ï¼ } +ï¼ ï¼ï¼ } +ï¼ ï¼ +ï¼ ï¼ (cc'd in Daniel Berrange). +ï¼ ï¼ I see that we call qio_channel_set_feature(ioc, +QIO_CHANNEL_FEATURE_SHUTDOWN) at the +ï¼ ï¼ top of qio_channel_socket_new so I think that's safe isn't it? +ï¼ ï¼ +ï¼ +ï¼ Hmm, you are right, this problem is only exist for the migration incoming fd, +thanks. +ï¼ +ï¼ ï¼ Dave +ï¼ ï¼ +ï¼ ï¼ï¼ Thanks, +ï¼ ï¼ï¼ Hailiang +ï¼ ï¼ï¼ +ï¼ ï¼ï¼ On 2017/3/21 16:10, address@hidden wrote: +ï¼ ï¼ï¼ï¼ Thank youã +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ I have test areadyã +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ When the Primary Node panic,the Secondary Node qemu hang at the same +placeã +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ Incorrding +http://wiki.qemu-project.org/Features/COLO +ï¼kill Primary Node +qemu will not produce the problem,but Primary Node panic canã +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ I think due to the feature of channel does not support +QIO_CHANNEL_FEATURE_SHUTDOWN. +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ when failover,channel_shutdown could not shut down the channel. +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ so the colo_process_incoming_thread will hang at recvmsg. +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ I test a patch: +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ diff --git a/migration/socket.c b/migration/socket.c +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ index 13966f1..d65a0ea 100644 +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ --- a/migration/socket.c +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +++ b/migration/socket.c +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ @@ -147,8 +147,9 @@ static gboolean +socket_accept_incoming_migration(QIOChannel *ioc, +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ } +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ trace_migration_socket_incoming_accepted() +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ qio_channel_set_name(QIO_CHANNEL(sioc), +"migration-socket-incoming") +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ + qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN) +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ migration_channel_process_incoming(migrate_get_current(), +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ QIO_CHANNEL(sioc)) +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ object_unref(OBJECT(sioc)) +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ My test will not hang any more. +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ åå§é®ä»¶ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ åä»¶äººï¼ address@hidden +ï¼ ï¼ï¼ï¼ æ¶ä»¶äººï¼ç广10165992 address@hidden +ï¼ ï¼ï¼ï¼ æéäººï¼ address@hidden address@hidden +ï¼ ï¼ï¼ï¼ æ¥ æ ï¼2017å¹´03æ21æ¥ 15:58 +ï¼ ï¼ï¼ï¼ 主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: [BUG]COLO failover hang +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ Hi,Wang. +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ You can test this branch: +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ and please follow wiki ensure your own configuration correctly. +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +http://wiki.qemu-project.org/Features/COLO +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ Thanks +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ Zhang Chen +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ï¼ On 03/21/2017 03:27 PM, address@hidden wrote: +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ hi. +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ I test the git qemu master have the same problem. +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ (gdb) bt +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, +ï¼ ï¼ï¼ï¼ ï¼ niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #1 0x00007f658e4aa0c2 in qio_channel_read +ï¼ ï¼ï¼ï¼ ï¼ (address@hidden, address@hidden "", +ï¼ ï¼ï¼ï¼ ï¼ address@hidden, address@hidden) at io/channel.c:114 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #2 0x00007f658e3ea990 in channel_get_buffer (opaque=ï¼optimized outï¼, +ï¼ ï¼ï¼ï¼ ï¼ buf=0x7f65907cb838 "", pos=ï¼optimized outï¼, size=32768) at +ï¼ ï¼ï¼ï¼ ï¼ migration/qemu-file-channel.c:78 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at +ï¼ ï¼ï¼ï¼ ï¼ migration/qemu-file.c:295 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, +ï¼ ï¼ï¼ï¼ ï¼ address@hidden) at migration/qemu-file.c:555 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at +ï¼ ï¼ï¼ï¼ ï¼ migration/qemu-file.c:568 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at +ï¼ ï¼ï¼ï¼ ï¼ migration/qemu-file.c:648 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, +ï¼ ï¼ï¼ï¼ ï¼ address@hidden) at migration/colo.c:244 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #8 0x00007f658e3e681e in colo_receive_check_message (f=ï¼optimized +ï¼ ï¼ï¼ï¼ ï¼ outï¼, address@hidden, +ï¼ ï¼ï¼ï¼ ï¼ address@hidden) +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ at migration/colo.c:264 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #9 0x00007f658e3e740e in colo_process_incoming_thread +ï¼ ï¼ï¼ï¼ ï¼ (opaque=0x7f658eb30360 ï¼mis_current.31286ï¼) at migration/colo.c:577 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ (gdb) p ioc-ï¼name +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ $2 = 0x7f658ff7d5c0 "migration-socket-incoming" +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ (gdb) p ioc-ï¼features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ $3 = 0 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ (gdb) bt +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, +ï¼ ï¼ï¼ï¼ ï¼ condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #1 0x00007fdcc6966350 in g_main_dispatch (context=ï¼optimized outï¼) at +ï¼ ï¼ï¼ï¼ ï¼ gmain.c:3054 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #2 g_main_context_dispatch (context=ï¼optimized outï¼, +ï¼ ï¼ï¼ï¼ ï¼ address@hidden) at gmain.c:3630 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #4 os_host_main_loop_wait (timeout=ï¼optimized outï¼) at +ï¼ ï¼ï¼ï¼ ï¼ util/main-loop.c:258 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #5 main_loop_wait (address@hidden) at +ï¼ ï¼ï¼ï¼ ï¼ util/main-loop.c:506 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #6 0x00007fdccb526187 in main_loop () at vl.c:1898 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ #7 main (argc=ï¼optimized outï¼, argv=ï¼optimized outï¼, envp=ï¼optimized +ï¼ ï¼ï¼ï¼ ï¼ outï¼) at vl.c:4709 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ (gdb) p ioc-ï¼features +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ $1 = 6 +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ (gdb) p ioc-ï¼name +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ $2 = 0x7fdcce1b1ab0 "migration-socket-listener" +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ May be socket_accept_incoming_migration should +ï¼ ï¼ï¼ï¼ ï¼ call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ thank you. +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ åå§é®ä»¶ +ï¼ ï¼ï¼ï¼ ï¼ address@hidden +ï¼ ï¼ï¼ï¼ ï¼ address@hidden +ï¼ ï¼ï¼ï¼ ï¼ address@hidden@huawei.comï¼ +ï¼ ï¼ï¼ï¼ ï¼ *æ¥ æ ï¼*2017å¹´03æ16æ¥ 14:46 +ï¼ ï¼ï¼ï¼ ï¼ *主 é¢ ï¼**Re: [Qemu-devel] COLO failover hang* +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ On 03/15/2017 05:06 PM, wangguang wrote: +ï¼ ï¼ï¼ï¼ ï¼ ï¼ am testing QEMU COLO feature described here [QEMU +ï¼ ï¼ï¼ï¼ ï¼ ï¼ Wiki]( +http://wiki.qemu-project.org/Features/COLO +). +ï¼ ï¼ï¼ï¼ ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ ï¼ When the Primary Node panic,the Secondary Node qemu hang. +ï¼ ï¼ï¼ï¼ ï¼ ï¼ hang at recvmsg in qio_channel_socket_readv. +ï¼ ï¼ï¼ï¼ ï¼ ï¼ And I run { 'execute': 'nbd-server-stop' } and { "execute": +ï¼ ï¼ï¼ï¼ ï¼ ï¼ "x-colo-lost-heartbeat" } in Secondary VM's +ï¼ ï¼ï¼ï¼ ï¼ ï¼ monitor,the Secondary Node qemu still hang at recvmsg . +ï¼ ï¼ï¼ï¼ ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ ï¼ I found that the colo in qemu is not complete yet. +ï¼ ï¼ï¼ï¼ ï¼ ï¼ Do the colo have any plan for development? +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ Yes, We are developing. You can see some of patch we pushing. +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ ï¼ Has anyone ever run it successfully? Any help is appreciated! +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ In our internal version can run it successfully, +ï¼ ï¼ï¼ï¼ ï¼ The failover detail you can ask Zhanghailiang for help. +ï¼ ï¼ï¼ï¼ ï¼ Next time if you have some question about COLO, +ï¼ ï¼ï¼ï¼ ï¼ please cc me and zhanghailiang address@hidden +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ Thanks +ï¼ ï¼ï¼ï¼ ï¼ Zhang Chen +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ ï¼ centos7.2+qemu2.7.50 +ï¼ ï¼ï¼ï¼ ï¼ ï¼ (gdb) bt +ï¼ ï¼ï¼ï¼ ï¼ ï¼ #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 +ï¼ ï¼ï¼ï¼ ï¼ ï¼ #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=ï¼optimized +outï¼, +ï¼ ï¼ï¼ï¼ ï¼ ï¼ iov=ï¼optimized outï¼, niov=ï¼optimized outï¼, fds=0x0, nfds=0x0, +errp=0x0) at +ï¼ ï¼ï¼ï¼ ï¼ ï¼ io/channel-socket.c:497 +ï¼ ï¼ï¼ï¼ ï¼ ï¼ #2 0x00007f3e03329472 in qio_channel_read (address@hidden, +ï¼ ï¼ï¼ï¼ ï¼ ï¼ address@hidden "", address@hidden, +ï¼ ï¼ï¼ï¼ ï¼ ï¼ address@hidden) at io/channel.c:97 +ï¼ ï¼ï¼ï¼ ï¼ ï¼ #3 0x00007f3e032750e0 in channel_get_buffer (opaque=ï¼optimized outï¼, +ï¼ ï¼ï¼ï¼ ï¼ ï¼ buf=0x7f3e05910f38 "", pos=ï¼optimized outï¼, size=32768) at +ï¼ ï¼ï¼ï¼ ï¼ ï¼ migration/qemu-file-channel.c:78 +ï¼ ï¼ï¼ï¼ ï¼ ï¼ #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at +ï¼ ï¼ï¼ï¼ ï¼ ï¼ migration/qemu-file.c:257 +ï¼ ï¼ï¼ï¼ ï¼ ï¼ #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, +ï¼ ï¼ï¼ï¼ ï¼ ï¼ address@hidden) at migration/qemu-file.c:510 +ï¼ ï¼ï¼ï¼ ï¼ ï¼ #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at +ï¼ ï¼ï¼ï¼ ï¼ ï¼ migration/qemu-file.c:523 +ï¼ ï¼ï¼ï¼ ï¼ ï¼ #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at +ï¼ ï¼ï¼ï¼ ï¼ ï¼ migration/qemu-file.c:603 +ï¼ ï¼ï¼ï¼ ï¼ ï¼ #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, +ï¼ ï¼ï¼ï¼ ï¼ ï¼ address@hidden) at migration/colo.c:215 +ï¼ ï¼ï¼ï¼ ï¼ ï¼ #9 0x00007f3e0327250d in colo_wait_handle_message +(errp=0x7f3d62bfaa48, +ï¼ ï¼ï¼ï¼ ï¼ ï¼ checkpoint_request=ï¼synthetic pointerï¼, f=ï¼optimized outï¼) at +ï¼ ï¼ï¼ï¼ ï¼ ï¼ migration/colo.c:546 +ï¼ ï¼ï¼ï¼ ï¼ ï¼ #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at +ï¼ ï¼ï¼ï¼ ï¼ ï¼ migration/colo.c:649 +ï¼ ï¼ï¼ï¼ ï¼ ï¼ #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 +ï¼ ï¼ï¼ï¼ ï¼ ï¼ #12 0x00007f3dfc9c03ed in clone () from /lib64/libc..so.6 +ï¼ ï¼ï¼ï¼ ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ ï¼ -- +ï¼ ï¼ï¼ï¼ ï¼ ï¼ View this message in context: +http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html +ï¼ ï¼ï¼ï¼ ï¼ ï¼ Sent from the Developer mailing list archive at Nabble.com. +ï¼ ï¼ï¼ï¼ ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ -- +ï¼ ï¼ï¼ï¼ ï¼ Thanks +ï¼ ï¼ï¼ï¼ ï¼ Zhang Chen +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ ï¼ +ï¼ ï¼ï¼ï¼ +ï¼ ï¼ï¼ +ï¼ ï¼ -- +ï¼ ï¼ Dr. David Alan Gilbert / address@hidden / Manchester, UK +ï¼ ï¼ +ï¼ ï¼ . +ï¼ ï¼ +ï¼ + |
