From e5634e2806195bee44407853c4bf8776f7abfa4f Mon Sep 17 00:00:00 2001 From: Christian Krinitsin Date: Sun, 1 Jun 2025 21:19:55 +0200 Subject: add the outputs of the first five revisions of the classifier --- classification_output/01/README.md | 4 - classification_output/01/instruction/0966902 | 39 - classification_output/01/instruction/11357571 | 47 + classification_output/01/instruction/11933524 | 1125 +++ classification_output/01/instruction/24190340 | 2056 ++++++ classification_output/01/instruction/26095107 | 158 + classification_output/01/instruction/2609717 | 4939 ------------- classification_output/01/instruction/2880487 | 187 - classification_output/01/instruction/33802194 | 4939 +++++++++++++ classification_output/01/instruction/3457423 | 40 - classification_output/01/instruction/42226390 | 187 + classification_output/01/instruction/50773216 | 110 + classification_output/01/instruction/51610399 | 308 + classification_output/01/instruction/55961334 | 39 + classification_output/01/instruction/5843372 | 2056 ------ classification_output/01/instruction/6117378 | 31 - classification_output/01/instruction/62179944 | 31 + classification_output/01/instruction/63565653 | 49 + classification_output/01/instruction/70868267 | 40 + classification_output/01/instruction/73660729 | 31 + classification_output/01/instruction/7647456 | 110 - classification_output/01/instruction/7658242 | 1125 --- classification_output/01/instruction/7733130 | 47 - classification_output/01/instruction/7960594 | 158 - classification_output/01/instruction/8019995 | 31 - classification_output/01/instruction/8566429 | 49 - classification_output/01/instruction/9818783 | 308 - classification_output/01/mistranslation/0247400 | 1486 ---- classification_output/01/mistranslation/1267916 | 1878 ----- classification_output/01/mistranslation/14887122 | 258 + classification_output/01/mistranslation/1693040 | 1061 --- classification_output/01/mistranslation/22219210 | 43 + classification_output/01/mistranslation/23270873 | 692 ++ classification_output/01/mistranslation/24930826 | 33 + classification_output/01/mistranslation/25842545 | 202 + classification_output/01/mistranslation/26430026 | 165 + classification_output/01/mistranslation/36568044 | 4581 ++++++++++++ classification_output/01/mistranslation/3886413 | 33 - classification_output/01/mistranslation/4158985 | 1480 ---- classification_output/01/mistranslation/4412535 | 348 - classification_output/01/mistranslation/5373318 | 692 -- classification_output/01/mistranslation/5798945 | 43 - classification_output/01/mistranslation/5933279 | 4581 ------------ classification_output/01/mistranslation/6178292 | 258 - classification_output/01/mistranslation/64322995 | 54 + classification_output/01/mistranslation/6866700 | 54 - classification_output/01/mistranslation/70294255 | 1061 +++ classification_output/01/mistranslation/71456293 | 1486 ++++ classification_output/01/mistranslation/74466963 | 1878 +++++ classification_output/01/mistranslation/74545755 | 344 + classification_output/01/mistranslation/7711787 | 165 - classification_output/01/mistranslation/80604314 | 1480 ++++ classification_output/01/mistranslation/80615920 | 348 + classification_output/01/mistranslation/8720260 | 344 - classification_output/01/mistranslation/8874178 | 202 - classification_output/01/other/0001467 | 100 - classification_output/01/other/02364653 | 363 + classification_output/01/other/02572177 | 421 ++ classification_output/01/other/04472277 | 576 ++ classification_output/01/other/0804350 | 7448 ------------------- classification_output/01/other/1067127 | 154 - classification_output/01/other/1195866 | 242 - classification_output/01/other/12869209 | 88 + classification_output/01/other/13442371 | 369 + classification_output/01/other/1398669 | 785 --- classification_output/01/other/1412913 | 2900 -------- classification_output/01/other/14488057 | 711 ++ classification_output/01/other/16056596 | 98 + classification_output/01/other/16201167 | 100 + classification_output/01/other/16228234 | 1844 +++++ classification_output/01/other/17743720 | 771 ++ classification_output/01/other/21221931 | 328 + classification_output/01/other/21247035 | 1321 ++++ classification_output/01/other/2308923 | 235 - classification_output/01/other/23300761 | 313 + classification_output/01/other/23448582 | 265 + classification_output/01/other/2393649 | 344 - classification_output/01/other/2409210 | 418 -- classification_output/01/other/2537817 | 532 -- classification_output/01/other/2562302 | 149 - classification_output/01/other/25892827 | 1077 +++ classification_output/01/other/31349848 | 154 + classification_output/01/other/3223447 | 199 - classification_output/01/other/32484936 | 223 + classification_output/01/other/3501174 | 2793 -------- classification_output/01/other/35170175 | 521 ++ classification_output/01/other/3749377 | 363 - classification_output/01/other/3825088 | 521 -- classification_output/01/other/42613410 | 149 + classification_output/01/other/42974450 | 429 ++ classification_output/01/other/4314117 | 711 -- classification_output/01/other/43643137 | 538 ++ classification_output/01/other/4774720 | 328 - classification_output/01/other/4800759 | 369 - classification_output/01/other/48245039 | 530 ++ classification_output/01/other/4938208 | 1844 ----- classification_output/01/other/4970412 | 88 - classification_output/01/other/5057521 | 771 -- classification_output/01/other/5215275 | 538 -- classification_output/01/other/5321072 | 421 -- classification_output/01/other/5362491 | 98 - classification_output/01/other/5396868 | 223 - classification_output/01/other/5443005 | 576 -- classification_output/01/other/55247116 | 1310 ++++ classification_output/01/other/55367348 | 532 ++ classification_output/01/other/55753058 | 293 + classification_output/01/other/56309929 | 180 + classification_output/01/other/56937788 | 344 + classification_output/01/other/57195159 | 315 + classification_output/01/other/57231878 | 242 + classification_output/01/other/5745618 | 155 - classification_output/01/other/57756589 | 1421 ++++ classification_output/01/other/5912779 | 315 - classification_output/01/other/59540920 | 376 + classification_output/01/other/60339453 | 61 + classification_output/01/other/6156219 | 1421 ---- classification_output/01/other/6257722 | 716 -- classification_output/01/other/6355518 | 530 -- classification_output/01/other/6416205 | 61 - classification_output/01/other/64571620 | 785 +++ classification_output/01/other/6531392 | 293 - classification_output/01/other/65781993 | 2793 ++++++++ classification_output/01/other/66743673 | 364 + classification_output/01/other/6739993 | 265 - classification_output/01/other/67821138 | 199 + classification_output/01/other/68897003 | 716 ++ classification_output/01/other/6983580 | 429 -- classification_output/01/other/6998781 | 1077 --- classification_output/01/other/70021271 | 7448 +++++++++++++++++++ classification_output/01/other/70416488 | 1179 ++++ classification_output/01/other/7143139 | 126 - classification_output/01/other/7427991 | 313 - classification_output/01/other/74715356 | 126 + classification_output/01/other/7639274 | 1310 ---- classification_output/01/other/79834768 | 409 ++ classification_output/01/other/8109943 | 409 -- classification_output/01/other/81775929 | 235 + classification_output/01/other/85542195 | 120 + classification_output/01/other/8621822 | 376 - classification_output/01/other/8627146 | 364 - classification_output/01/other/8653736 | 120 - classification_output/01/other/8691137 | 180 - classification_output/01/other/88225572 | 2900 ++++++++ classification_output/01/other/88281850 | 281 + classification_output/01/other/92957605 | 418 ++ classification_output/01/other/95154278 | 155 + classification_output/01/other/9777608 | 281 - classification_output/01/other/9840852 | 1179 ---- classification_output/01/other/9937102 | 148 - classification_output/01/other/9948366 | 1321 ---- classification_output/01/other/99674399 | 148 + classification_output/01/semantic/0504199 | 595 -- classification_output/01/semantic/05479587 | 83 + classification_output/01/semantic/0891566 | 400 -- classification_output/01/semantic/12360755 | 296 + classification_output/01/semantic/1452608 | 78 - classification_output/01/semantic/2047990 | 999 --- classification_output/01/semantic/28596630 | 113 + classification_output/01/semantic/30680944 | 595 ++ classification_output/01/semantic/3242247 | 406 -- classification_output/01/semantic/3847403 | 83 - classification_output/01/semantic/46572227 | 406 ++ classification_output/01/semantic/53568181 | 78 + classification_output/01/semantic/7837801 | 113 - classification_output/01/semantic/80570214 | 400 ++ classification_output/01/semantic/8511484 | 296 - classification_output/01/semantic/96782458 | 999 +++ .../01/semantic/gitlab_semantic_addsubps | 28 + .../01/semantic/gitlab_semantic_adox | 41 + .../01/semantic/gitlab_semantic_bextr | 30 + .../01/semantic/gitlab_semantic_blsi | 25 + .../01/semantic/gitlab_semantic_blsmsk | 32 + .../01/semantic/gitlab_semantic_bzhi | 43 + classification_output/02/boot/42226390 | 188 + classification_output/02/boot/51610399 | 309 + classification_output/02/boot/60339453 | 62 + classification_output/02/boot/67821138 | 200 + classification_output/02/instruction/11357571 | 48 + classification_output/02/instruction/11933524 | 1126 +++ classification_output/02/instruction/24190340 | 2057 ++++++ classification_output/02/instruction/26095107 | 159 + classification_output/02/instruction/33802194 | 4940 +++++++++++++ classification_output/02/instruction/50773216 | 111 + classification_output/02/instruction/55961334 | 40 + classification_output/02/instruction/62179944 | 32 + classification_output/02/instruction/63565653 | 50 + classification_output/02/instruction/70868267 | 41 + classification_output/02/instruction/73660729 | 32 + classification_output/02/mistranslation/14887122 | 259 + classification_output/02/mistranslation/22219210 | 44 + classification_output/02/mistranslation/23270873 | 693 ++ classification_output/02/mistranslation/24930826 | 34 + classification_output/02/mistranslation/25842545 | 203 + classification_output/02/mistranslation/26430026 | 166 + classification_output/02/mistranslation/36568044 | 4582 ++++++++++++ classification_output/02/mistranslation/64322995 | 55 + classification_output/02/mistranslation/70294255 | 1062 +++ classification_output/02/mistranslation/71456293 | 1487 ++++ classification_output/02/mistranslation/74466963 | 1879 +++++ classification_output/02/mistranslation/74545755 | 345 + classification_output/02/mistranslation/80604314 | 1481 ++++ classification_output/02/mistranslation/80615920 | 349 + classification_output/02/other/02364653 | 364 + classification_output/02/other/02572177 | 422 ++ classification_output/02/other/04472277 | 577 ++ classification_output/02/other/12869209 | 89 + classification_output/02/other/13442371 | 370 + classification_output/02/other/14488057 | 712 ++ classification_output/02/other/16056596 | 99 + classification_output/02/other/16201167 | 101 + classification_output/02/other/16228234 | 1845 +++++ classification_output/02/other/17743720 | 772 ++ classification_output/02/other/21221931 | 329 + classification_output/02/other/21247035 | 1322 ++++ classification_output/02/other/23300761 | 314 + classification_output/02/other/23448582 | 266 + classification_output/02/other/25892827 | 1078 +++ classification_output/02/other/31349848 | 155 + classification_output/02/other/32484936 | 224 + classification_output/02/other/35170175 | 522 ++ classification_output/02/other/42613410 | 150 + classification_output/02/other/42974450 | 430 ++ classification_output/02/other/43643137 | 539 ++ classification_output/02/other/48245039 | 531 ++ classification_output/02/other/55247116 | 1311 ++++ classification_output/02/other/55367348 | 533 ++ classification_output/02/other/55753058 | 294 + classification_output/02/other/56309929 | 181 + classification_output/02/other/56937788 | 345 + classification_output/02/other/57195159 | 316 + classification_output/02/other/57231878 | 243 + classification_output/02/other/57756589 | 1422 ++++ classification_output/02/other/59540920 | 377 + classification_output/02/other/64571620 | 786 +++ classification_output/02/other/65781993 | 2794 ++++++++ classification_output/02/other/66743673 | 365 + classification_output/02/other/68897003 | 717 ++ classification_output/02/other/70021271 | 7449 +++++++++++++++++++ classification_output/02/other/70416488 | 1180 ++++ classification_output/02/other/74715356 | 127 + classification_output/02/other/79834768 | 410 ++ classification_output/02/other/81775929 | 236 + classification_output/02/other/85542195 | 121 + classification_output/02/other/88225572 | 2901 ++++++++ classification_output/02/other/88281850 | 282 + classification_output/02/other/92957605 | 419 ++ classification_output/02/other/95154278 | 156 + classification_output/02/other/99674399 | 149 + classification_output/02/semantic/05479587 | 84 + classification_output/02/semantic/12360755 | 297 + classification_output/02/semantic/28596630 | 114 + classification_output/02/semantic/30680944 | 596 ++ classification_output/02/semantic/46572227 | 407 ++ classification_output/02/semantic/53568181 | 79 + classification_output/02/semantic/80570214 | 401 ++ classification_output/02/semantic/96782458 | 1000 +++ .../02/semantic/gitlab_semantic_addsubps | 29 + .../02/semantic/gitlab_semantic_adox | 42 + .../02/semantic/gitlab_semantic_bextr | 31 + .../02/semantic/gitlab_semantic_blsi | 26 + .../02/semantic/gitlab_semantic_blsmsk | 33 + .../02/semantic/gitlab_semantic_bzhi | 44 + classification_output/03/KVM/04472277 | 579 ++ classification_output/03/KVM/26430026 | 168 + classification_output/03/KVM/33802194 | 4942 +++++++++++++ classification_output/03/KVM/42613410 | 152 + classification_output/03/KVM/43643137 | 541 ++ classification_output/03/KVM/55961334 | 42 + classification_output/03/KVM/71456293 | 1489 ++++ classification_output/03/KVM/80615920 | 351 + classification_output/03/boot/42226390 | 190 + classification_output/03/boot/51610399 | 311 + classification_output/03/boot/60339453 | 64 + classification_output/03/boot/67821138 | 202 + classification_output/03/instruction/11357571 | 50 + classification_output/03/instruction/11933524 | 1128 +++ classification_output/03/instruction/24190340 | 2059 ++++++ classification_output/03/instruction/26095107 | 161 + classification_output/03/instruction/50773216 | 113 + classification_output/03/instruction/63565653 | 52 + classification_output/03/instruction/70868267 | 43 + classification_output/03/instruction/73660729 | 34 + classification_output/03/mistranslation/14887122 | 261 + classification_output/03/mistranslation/22219210 | 46 + classification_output/03/mistranslation/23270873 | 695 ++ classification_output/03/mistranslation/24930826 | 36 + classification_output/03/mistranslation/25842545 | 205 + classification_output/03/mistranslation/36568044 | 4584 ++++++++++++ classification_output/03/mistranslation/64322995 | 57 + classification_output/03/mistranslation/70294255 | 1064 +++ classification_output/03/mistranslation/74466963 | 1881 +++++ classification_output/03/mistranslation/74545755 | 347 + classification_output/03/mistranslation/80604314 | 1483 ++++ classification_output/03/network/05479587 | 86 + classification_output/03/network/62179944 | 34 + classification_output/03/other/02364653 | 366 + classification_output/03/other/02572177 | 424 ++ classification_output/03/other/12869209 | 91 + classification_output/03/other/13442371 | 372 + classification_output/03/other/14488057 | 714 ++ classification_output/03/other/16056596 | 101 + classification_output/03/other/16201167 | 103 + classification_output/03/other/16228234 | 1847 +++++ classification_output/03/other/17743720 | 774 ++ classification_output/03/other/21221931 | 331 + classification_output/03/other/21247035 | 1324 ++++ classification_output/03/other/23300761 | 316 + classification_output/03/other/23448582 | 268 + classification_output/03/other/25892827 | 1080 +++ classification_output/03/other/31349848 | 157 + classification_output/03/other/32484936 | 226 + classification_output/03/other/35170175 | 524 ++ classification_output/03/other/42974450 | 432 ++ classification_output/03/other/48245039 | 533 ++ classification_output/03/other/55247116 | 1313 ++++ classification_output/03/other/55367348 | 535 ++ classification_output/03/other/55753058 | 296 + classification_output/03/other/56309929 | 183 + classification_output/03/other/56937788 | 347 + classification_output/03/other/57195159 | 318 + classification_output/03/other/57231878 | 245 + classification_output/03/other/57756589 | 1424 ++++ classification_output/03/other/59540920 | 379 + classification_output/03/other/64571620 | 788 +++ classification_output/03/other/65781993 | 2796 ++++++++ classification_output/03/other/66743673 | 367 + classification_output/03/other/68897003 | 719 ++ classification_output/03/other/70021271 | 7451 +++++++++++++++++++ classification_output/03/other/70416488 | 1182 ++++ classification_output/03/other/74715356 | 129 + classification_output/03/other/79834768 | 412 ++ classification_output/03/other/81775929 | 238 + classification_output/03/other/85542195 | 123 + classification_output/03/other/88225572 | 2903 ++++++++ classification_output/03/other/88281850 | 284 + classification_output/03/other/92957605 | 421 ++ classification_output/03/other/95154278 | 158 + classification_output/03/other/99674399 | 151 + classification_output/03/semantic/12360755 | 299 + classification_output/03/semantic/28596630 | 116 + classification_output/03/semantic/30680944 | 598 ++ classification_output/03/semantic/46572227 | 409 ++ classification_output/03/semantic/53568181 | 81 + classification_output/03/semantic/80570214 | 403 ++ classification_output/03/semantic/96782458 | 1002 +++ .../03/semantic/gitlab_semantic_addsubps | 31 + .../03/semantic/gitlab_semantic_adox | 44 + .../03/semantic/gitlab_semantic_bextr | 33 + .../03/semantic/gitlab_semantic_blsi | 28 + .../03/semantic/gitlab_semantic_blsmsk | 35 + .../03/semantic/gitlab_semantic_bzhi | 46 + classification_output/04/KVM/04472277 | 584 ++ classification_output/04/KVM/26430026 | 173 + classification_output/04/KVM/43643137 | 546 ++ classification_output/04/KVM/71456293 | 1494 ++++ classification_output/04/KVM/80615920 | 356 + classification_output/04/assembly/48245039 | 538 ++ classification_output/04/boot/51610399 | 316 + classification_output/04/boot/60339453 | 69 + classification_output/04/device/14488057 | 719 ++ classification_output/04/device/24190340 | 2064 ++++++ classification_output/04/device/24930826 | 41 + classification_output/04/device/28596630 | 121 + classification_output/04/device/42226390 | 195 + classification_output/04/device/57195159 | 323 + classification_output/04/device/57231878 | 250 + classification_output/04/device/67821138 | 207 + classification_output/04/device/99674399 | 156 + classification_output/04/graphic/22219210 | 51 + classification_output/04/graphic/30680944 | 603 ++ classification_output/04/graphic/55961334 | 47 + classification_output/04/graphic/73660729 | 39 + classification_output/04/instruction/11933524 | 1133 +++ classification_output/04/instruction/26095107 | 166 + classification_output/04/instruction/50773216 | 118 + classification_output/04/instruction/63565653 | 57 + classification_output/04/instruction/70868267 | 48 + classification_output/04/mistranslation/14887122 | 266 + classification_output/04/mistranslation/23270873 | 700 ++ classification_output/04/mistranslation/25842545 | 210 + classification_output/04/mistranslation/36568044 | 4589 ++++++++++++ classification_output/04/mistranslation/64322995 | 62 + classification_output/04/mistranslation/70294255 | 1069 +++ classification_output/04/mistranslation/74466963 | 1886 +++++ classification_output/04/mistranslation/74545755 | 352 + classification_output/04/mistranslation/80604314 | 1488 ++++ classification_output/04/network/05479587 | 91 + classification_output/04/network/62179944 | 39 + classification_output/04/other/02364653 | 371 + classification_output/04/other/02572177 | 429 ++ classification_output/04/other/12869209 | 96 + classification_output/04/other/13442371 | 377 + classification_output/04/other/16056596 | 106 + classification_output/04/other/16201167 | 108 + classification_output/04/other/16228234 | 1852 +++++ classification_output/04/other/17743720 | 779 ++ classification_output/04/other/21221931 | 336 + classification_output/04/other/21247035 | 1329 ++++ classification_output/04/other/23300761 | 321 + classification_output/04/other/23448582 | 273 + classification_output/04/other/25892827 | 1085 +++ classification_output/04/other/31349848 | 162 + classification_output/04/other/32484936 | 231 + classification_output/04/other/35170175 | 529 ++ classification_output/04/other/42974450 | 437 ++ classification_output/04/other/55247116 | 1318 ++++ classification_output/04/other/55367348 | 540 ++ classification_output/04/other/55753058 | 301 + classification_output/04/other/56309929 | 188 + classification_output/04/other/56937788 | 352 + classification_output/04/other/57756589 | 1429 ++++ classification_output/04/other/59540920 | 384 + classification_output/04/other/64571620 | 793 +++ classification_output/04/other/65781993 | 2801 ++++++++ classification_output/04/other/66743673 | 372 + classification_output/04/other/68897003 | 724 ++ classification_output/04/other/70021271 | 7456 ++++++++++++++++++++ classification_output/04/other/70416488 | 1187 ++++ classification_output/04/other/74715356 | 134 + classification_output/04/other/79834768 | 417 ++ classification_output/04/other/81775929 | 243 + classification_output/04/other/85542195 | 128 + classification_output/04/other/88225572 | 2908 ++++++++ classification_output/04/other/88281850 | 289 + classification_output/04/other/92957605 | 426 ++ classification_output/04/other/95154278 | 163 + classification_output/04/semantic/12360755 | 304 + classification_output/04/semantic/46572227 | 414 ++ classification_output/04/semantic/53568181 | 86 + classification_output/04/semantic/96782458 | 1007 +++ .../04/semantic/gitlab_semantic_addsubps | 36 + .../04/semantic/gitlab_semantic_adox | 49 + .../04/semantic/gitlab_semantic_bextr | 38 + .../04/semantic/gitlab_semantic_blsi | 33 + .../04/semantic/gitlab_semantic_blsmsk | 40 + .../04/semantic/gitlab_semantic_bzhi | 51 + classification_output/04/vnc/11357571 | 55 + classification_output/04/vnc/33802194 | 4947 +++++++++++++ classification_output/04/vnc/42613410 | 157 + classification_output/04/vnc/80570214 | 408 ++ classification_output/05/KVM/04472277 | 584 ++ classification_output/05/KVM/26430026 | 173 + classification_output/05/KVM/43643137 | 546 ++ classification_output/05/KVM/71456293 | 1494 ++++ classification_output/05/KVM/80615920 | 356 + classification_output/05/boot/51610399 | 316 + classification_output/05/boot/60339453 | 69 + classification_output/05/device/14488057 | 719 ++ classification_output/05/device/24190340 | 2064 ++++++ classification_output/05/device/24930826 | 41 + classification_output/05/device/26095107 | 166 + classification_output/05/device/28596630 | 121 + classification_output/05/device/36568044 | 4589 ++++++++++++ classification_output/05/device/42226390 | 195 + classification_output/05/device/48245039 | 538 ++ classification_output/05/device/57195159 | 323 + classification_output/05/device/57231878 | 250 + classification_output/05/device/67821138 | 207 + classification_output/05/device/99674399 | 156 + classification_output/05/graphic/22219210 | 51 + classification_output/05/graphic/30680944 | 603 ++ classification_output/05/graphic/46572227 | 414 ++ classification_output/05/graphic/53568181 | 86 + classification_output/05/graphic/55961334 | 47 + classification_output/05/graphic/73660729 | 39 + classification_output/05/instruction/11933524 | 1133 +++ classification_output/05/instruction/50773216 | 118 + classification_output/05/instruction/63565653 | 57 + classification_output/05/instruction/70868267 | 48 + classification_output/05/mistranslation/14887122 | 266 + classification_output/05/mistranslation/23270873 | 700 ++ classification_output/05/mistranslation/25842545 | 210 + classification_output/05/mistranslation/64322995 | 62 + classification_output/05/mistranslation/70294255 | 1069 +++ classification_output/05/mistranslation/74466963 | 1886 +++++ classification_output/05/mistranslation/74545755 | 352 + classification_output/05/mistranslation/80604314 | 1488 ++++ classification_output/05/network/05479587 | 91 + classification_output/05/network/62179944 | 39 + classification_output/05/other/02364653 | 371 + classification_output/05/other/02572177 | 429 ++ classification_output/05/other/12869209 | 96 + classification_output/05/other/13442371 | 377 + classification_output/05/other/16056596 | 106 + classification_output/05/other/16201167 | 108 + classification_output/05/other/16228234 | 1852 +++++ classification_output/05/other/17743720 | 779 ++ classification_output/05/other/21221931 | 336 + classification_output/05/other/21247035 | 1329 ++++ classification_output/05/other/23300761 | 321 + classification_output/05/other/23448582 | 273 + classification_output/05/other/25892827 | 1085 +++ classification_output/05/other/31349848 | 162 + classification_output/05/other/32484936 | 231 + classification_output/05/other/35170175 | 529 ++ classification_output/05/other/42974450 | 437 ++ classification_output/05/other/55247116 | 1318 ++++ classification_output/05/other/55367348 | 540 ++ classification_output/05/other/55753058 | 301 + classification_output/05/other/56309929 | 188 + classification_output/05/other/56937788 | 352 + classification_output/05/other/57756589 | 1429 ++++ classification_output/05/other/59540920 | 384 + classification_output/05/other/64571620 | 793 +++ classification_output/05/other/65781993 | 2801 ++++++++ classification_output/05/other/66743673 | 372 + classification_output/05/other/68897003 | 724 ++ classification_output/05/other/70021271 | 7456 ++++++++++++++++++++ classification_output/05/other/70416488 | 1187 ++++ classification_output/05/other/74715356 | 134 + classification_output/05/other/79834768 | 417 ++ classification_output/05/other/81775929 | 243 + classification_output/05/other/85542195 | 128 + classification_output/05/other/88225572 | 2908 ++++++++ classification_output/05/other/88281850 | 289 + classification_output/05/other/92957605 | 426 ++ classification_output/05/other/95154278 | 163 + classification_output/05/other/96782458 | 1007 +++ classification_output/05/semantic/12360755 | 304 + .../05/semantic/gitlab_semantic_addsubps | 36 + .../05/semantic/gitlab_semantic_adox | 49 + .../05/semantic/gitlab_semantic_bextr | 38 + .../05/semantic/gitlab_semantic_blsi | 33 + .../05/semantic/gitlab_semantic_blsmsk | 40 + .../05/semantic/gitlab_semantic_bzhi | 51 + classification_output/05/vnc/11357571 | 55 + classification_output/05/vnc/33802194 | 4947 +++++++++++++ classification_output/05/vnc/42613410 | 157 + classification_output/05/vnc/80570214 | 408 ++ 529 files changed, 299045 insertions(+), 59258 deletions(-) delete mode 100644 classification_output/01/README.md delete mode 100644 classification_output/01/instruction/0966902 create mode 100644 classification_output/01/instruction/11357571 create mode 100644 classification_output/01/instruction/11933524 create mode 100644 classification_output/01/instruction/24190340 create mode 100644 classification_output/01/instruction/26095107 delete mode 100644 classification_output/01/instruction/2609717 delete mode 100644 classification_output/01/instruction/2880487 create mode 100644 classification_output/01/instruction/33802194 delete mode 100644 classification_output/01/instruction/3457423 create mode 100644 classification_output/01/instruction/42226390 create mode 100644 classification_output/01/instruction/50773216 create mode 100644 classification_output/01/instruction/51610399 create mode 100644 classification_output/01/instruction/55961334 delete mode 100644 classification_output/01/instruction/5843372 delete mode 100644 classification_output/01/instruction/6117378 create mode 100644 classification_output/01/instruction/62179944 create mode 100644 classification_output/01/instruction/63565653 create mode 100644 classification_output/01/instruction/70868267 create mode 100644 classification_output/01/instruction/73660729 delete mode 100644 classification_output/01/instruction/7647456 delete mode 100644 classification_output/01/instruction/7658242 delete mode 100644 classification_output/01/instruction/7733130 delete mode 100644 classification_output/01/instruction/7960594 delete mode 100644 classification_output/01/instruction/8019995 delete mode 100644 classification_output/01/instruction/8566429 delete mode 100644 classification_output/01/instruction/9818783 delete mode 100644 classification_output/01/mistranslation/0247400 delete mode 100644 classification_output/01/mistranslation/1267916 create mode 100644 classification_output/01/mistranslation/14887122 delete mode 100644 classification_output/01/mistranslation/1693040 create mode 100644 classification_output/01/mistranslation/22219210 create mode 100644 classification_output/01/mistranslation/23270873 create mode 100644 classification_output/01/mistranslation/24930826 create mode 100644 classification_output/01/mistranslation/25842545 create mode 100644 classification_output/01/mistranslation/26430026 create mode 100644 classification_output/01/mistranslation/36568044 delete mode 100644 classification_output/01/mistranslation/3886413 delete mode 100644 classification_output/01/mistranslation/4158985 delete mode 100644 classification_output/01/mistranslation/4412535 delete mode 100644 classification_output/01/mistranslation/5373318 delete mode 100644 classification_output/01/mistranslation/5798945 delete mode 100644 classification_output/01/mistranslation/5933279 delete mode 100644 classification_output/01/mistranslation/6178292 create mode 100644 classification_output/01/mistranslation/64322995 delete mode 100644 classification_output/01/mistranslation/6866700 create mode 100644 classification_output/01/mistranslation/70294255 create mode 100644 classification_output/01/mistranslation/71456293 create mode 100644 classification_output/01/mistranslation/74466963 create mode 100644 classification_output/01/mistranslation/74545755 delete mode 100644 classification_output/01/mistranslation/7711787 create mode 100644 classification_output/01/mistranslation/80604314 create mode 100644 classification_output/01/mistranslation/80615920 delete mode 100644 classification_output/01/mistranslation/8720260 delete mode 100644 classification_output/01/mistranslation/8874178 delete mode 100644 classification_output/01/other/0001467 create mode 100644 classification_output/01/other/02364653 create mode 100644 classification_output/01/other/02572177 create mode 100644 classification_output/01/other/04472277 delete mode 100644 classification_output/01/other/0804350 delete mode 100644 classification_output/01/other/1067127 delete mode 100644 classification_output/01/other/1195866 create mode 100644 classification_output/01/other/12869209 create mode 100644 classification_output/01/other/13442371 delete mode 100644 classification_output/01/other/1398669 delete mode 100644 classification_output/01/other/1412913 create mode 100644 classification_output/01/other/14488057 create mode 100644 classification_output/01/other/16056596 create mode 100644 classification_output/01/other/16201167 create mode 100644 classification_output/01/other/16228234 create mode 100644 classification_output/01/other/17743720 create mode 100644 classification_output/01/other/21221931 create mode 100644 classification_output/01/other/21247035 delete mode 100644 classification_output/01/other/2308923 create mode 100644 classification_output/01/other/23300761 create mode 100644 classification_output/01/other/23448582 delete mode 100644 classification_output/01/other/2393649 delete mode 100644 classification_output/01/other/2409210 delete mode 100644 classification_output/01/other/2537817 delete mode 100644 classification_output/01/other/2562302 create mode 100644 classification_output/01/other/25892827 create mode 100644 classification_output/01/other/31349848 delete mode 100644 classification_output/01/other/3223447 create mode 100644 classification_output/01/other/32484936 delete mode 100644 classification_output/01/other/3501174 create mode 100644 classification_output/01/other/35170175 delete mode 100644 classification_output/01/other/3749377 delete mode 100644 classification_output/01/other/3825088 create mode 100644 classification_output/01/other/42613410 create mode 100644 classification_output/01/other/42974450 delete mode 100644 classification_output/01/other/4314117 create mode 100644 classification_output/01/other/43643137 delete mode 100644 classification_output/01/other/4774720 delete mode 100644 classification_output/01/other/4800759 create mode 100644 classification_output/01/other/48245039 delete mode 100644 classification_output/01/other/4938208 delete mode 100644 classification_output/01/other/4970412 delete mode 100644 classification_output/01/other/5057521 delete mode 100644 classification_output/01/other/5215275 delete mode 100644 classification_output/01/other/5321072 delete mode 100644 classification_output/01/other/5362491 delete mode 100644 classification_output/01/other/5396868 delete mode 100644 classification_output/01/other/5443005 create mode 100644 classification_output/01/other/55247116 create mode 100644 classification_output/01/other/55367348 create mode 100644 classification_output/01/other/55753058 create mode 100644 classification_output/01/other/56309929 create mode 100644 classification_output/01/other/56937788 create mode 100644 classification_output/01/other/57195159 create mode 100644 classification_output/01/other/57231878 delete mode 100644 classification_output/01/other/5745618 create mode 100644 classification_output/01/other/57756589 delete mode 100644 classification_output/01/other/5912779 create mode 100644 classification_output/01/other/59540920 create mode 100644 classification_output/01/other/60339453 delete mode 100644 classification_output/01/other/6156219 delete mode 100644 classification_output/01/other/6257722 delete mode 100644 classification_output/01/other/6355518 delete mode 100644 classification_output/01/other/6416205 create mode 100644 classification_output/01/other/64571620 delete mode 100644 classification_output/01/other/6531392 create mode 100644 classification_output/01/other/65781993 create mode 100644 classification_output/01/other/66743673 delete mode 100644 classification_output/01/other/6739993 create mode 100644 classification_output/01/other/67821138 create mode 100644 classification_output/01/other/68897003 delete mode 100644 classification_output/01/other/6983580 delete mode 100644 classification_output/01/other/6998781 create mode 100644 classification_output/01/other/70021271 create mode 100644 classification_output/01/other/70416488 delete mode 100644 classification_output/01/other/7143139 delete mode 100644 classification_output/01/other/7427991 create mode 100644 classification_output/01/other/74715356 delete mode 100644 classification_output/01/other/7639274 create mode 100644 classification_output/01/other/79834768 delete mode 100644 classification_output/01/other/8109943 create mode 100644 classification_output/01/other/81775929 create mode 100644 classification_output/01/other/85542195 delete mode 100644 classification_output/01/other/8621822 delete mode 100644 classification_output/01/other/8627146 delete mode 100644 classification_output/01/other/8653736 delete mode 100644 classification_output/01/other/8691137 create mode 100644 classification_output/01/other/88225572 create mode 100644 classification_output/01/other/88281850 create mode 100644 classification_output/01/other/92957605 create mode 100644 classification_output/01/other/95154278 delete mode 100644 classification_output/01/other/9777608 delete mode 100644 classification_output/01/other/9840852 delete mode 100644 classification_output/01/other/9937102 delete mode 100644 classification_output/01/other/9948366 create mode 100644 classification_output/01/other/99674399 delete mode 100644 classification_output/01/semantic/0504199 create mode 100644 classification_output/01/semantic/05479587 delete mode 100644 classification_output/01/semantic/0891566 create mode 100644 classification_output/01/semantic/12360755 delete mode 100644 classification_output/01/semantic/1452608 delete mode 100644 classification_output/01/semantic/2047990 create mode 100644 classification_output/01/semantic/28596630 create mode 100644 classification_output/01/semantic/30680944 delete mode 100644 classification_output/01/semantic/3242247 delete mode 100644 classification_output/01/semantic/3847403 create mode 100644 classification_output/01/semantic/46572227 create mode 100644 classification_output/01/semantic/53568181 delete mode 100644 classification_output/01/semantic/7837801 create mode 100644 classification_output/01/semantic/80570214 delete mode 100644 classification_output/01/semantic/8511484 create mode 100644 classification_output/01/semantic/96782458 create mode 100644 classification_output/01/semantic/gitlab_semantic_addsubps create mode 100644 classification_output/01/semantic/gitlab_semantic_adox create mode 100644 classification_output/01/semantic/gitlab_semantic_bextr create mode 100644 classification_output/01/semantic/gitlab_semantic_blsi create mode 100644 classification_output/01/semantic/gitlab_semantic_blsmsk create mode 100644 classification_output/01/semantic/gitlab_semantic_bzhi create mode 100644 classification_output/02/boot/42226390 create mode 100644 classification_output/02/boot/51610399 create mode 100644 classification_output/02/boot/60339453 create mode 100644 classification_output/02/boot/67821138 create mode 100644 classification_output/02/instruction/11357571 create mode 100644 classification_output/02/instruction/11933524 create mode 100644 classification_output/02/instruction/24190340 create mode 100644 classification_output/02/instruction/26095107 create mode 100644 classification_output/02/instruction/33802194 create mode 100644 classification_output/02/instruction/50773216 create mode 100644 classification_output/02/instruction/55961334 create mode 100644 classification_output/02/instruction/62179944 create mode 100644 classification_output/02/instruction/63565653 create mode 100644 classification_output/02/instruction/70868267 create mode 100644 classification_output/02/instruction/73660729 create mode 100644 classification_output/02/mistranslation/14887122 create mode 100644 classification_output/02/mistranslation/22219210 create mode 100644 classification_output/02/mistranslation/23270873 create mode 100644 classification_output/02/mistranslation/24930826 create mode 100644 classification_output/02/mistranslation/25842545 create mode 100644 classification_output/02/mistranslation/26430026 create mode 100644 classification_output/02/mistranslation/36568044 create mode 100644 classification_output/02/mistranslation/64322995 create mode 100644 classification_output/02/mistranslation/70294255 create mode 100644 classification_output/02/mistranslation/71456293 create mode 100644 classification_output/02/mistranslation/74466963 create mode 100644 classification_output/02/mistranslation/74545755 create mode 100644 classification_output/02/mistranslation/80604314 create mode 100644 classification_output/02/mistranslation/80615920 create mode 100644 classification_output/02/other/02364653 create mode 100644 classification_output/02/other/02572177 create mode 100644 classification_output/02/other/04472277 create mode 100644 classification_output/02/other/12869209 create mode 100644 classification_output/02/other/13442371 create mode 100644 classification_output/02/other/14488057 create mode 100644 classification_output/02/other/16056596 create mode 100644 classification_output/02/other/16201167 create mode 100644 classification_output/02/other/16228234 create mode 100644 classification_output/02/other/17743720 create mode 100644 classification_output/02/other/21221931 create mode 100644 classification_output/02/other/21247035 create mode 100644 classification_output/02/other/23300761 create mode 100644 classification_output/02/other/23448582 create mode 100644 classification_output/02/other/25892827 create mode 100644 classification_output/02/other/31349848 create mode 100644 classification_output/02/other/32484936 create mode 100644 classification_output/02/other/35170175 create mode 100644 classification_output/02/other/42613410 create mode 100644 classification_output/02/other/42974450 create mode 100644 classification_output/02/other/43643137 create mode 100644 classification_output/02/other/48245039 create mode 100644 classification_output/02/other/55247116 create mode 100644 classification_output/02/other/55367348 create mode 100644 classification_output/02/other/55753058 create mode 100644 classification_output/02/other/56309929 create mode 100644 classification_output/02/other/56937788 create mode 100644 classification_output/02/other/57195159 create mode 100644 classification_output/02/other/57231878 create mode 100644 classification_output/02/other/57756589 create mode 100644 classification_output/02/other/59540920 create mode 100644 classification_output/02/other/64571620 create mode 100644 classification_output/02/other/65781993 create mode 100644 classification_output/02/other/66743673 create mode 100644 classification_output/02/other/68897003 create mode 100644 classification_output/02/other/70021271 create mode 100644 classification_output/02/other/70416488 create mode 100644 classification_output/02/other/74715356 create mode 100644 classification_output/02/other/79834768 create mode 100644 classification_output/02/other/81775929 create mode 100644 classification_output/02/other/85542195 create mode 100644 classification_output/02/other/88225572 create mode 100644 classification_output/02/other/88281850 create mode 100644 classification_output/02/other/92957605 create mode 100644 classification_output/02/other/95154278 create mode 100644 classification_output/02/other/99674399 create mode 100644 classification_output/02/semantic/05479587 create mode 100644 classification_output/02/semantic/12360755 create mode 100644 classification_output/02/semantic/28596630 create mode 100644 classification_output/02/semantic/30680944 create mode 100644 classification_output/02/semantic/46572227 create mode 100644 classification_output/02/semantic/53568181 create mode 100644 classification_output/02/semantic/80570214 create mode 100644 classification_output/02/semantic/96782458 create mode 100644 classification_output/02/semantic/gitlab_semantic_addsubps create mode 100644 classification_output/02/semantic/gitlab_semantic_adox create mode 100644 classification_output/02/semantic/gitlab_semantic_bextr create mode 100644 classification_output/02/semantic/gitlab_semantic_blsi create mode 100644 classification_output/02/semantic/gitlab_semantic_blsmsk create mode 100644 classification_output/02/semantic/gitlab_semantic_bzhi create mode 100644 classification_output/03/KVM/04472277 create mode 100644 classification_output/03/KVM/26430026 create mode 100644 classification_output/03/KVM/33802194 create mode 100644 classification_output/03/KVM/42613410 create mode 100644 classification_output/03/KVM/43643137 create mode 100644 classification_output/03/KVM/55961334 create mode 100644 classification_output/03/KVM/71456293 create mode 100644 classification_output/03/KVM/80615920 create mode 100644 classification_output/03/boot/42226390 create mode 100644 classification_output/03/boot/51610399 create mode 100644 classification_output/03/boot/60339453 create mode 100644 classification_output/03/boot/67821138 create mode 100644 classification_output/03/instruction/11357571 create mode 100644 classification_output/03/instruction/11933524 create mode 100644 classification_output/03/instruction/24190340 create mode 100644 classification_output/03/instruction/26095107 create mode 100644 classification_output/03/instruction/50773216 create mode 100644 classification_output/03/instruction/63565653 create mode 100644 classification_output/03/instruction/70868267 create mode 100644 classification_output/03/instruction/73660729 create mode 100644 classification_output/03/mistranslation/14887122 create mode 100644 classification_output/03/mistranslation/22219210 create mode 100644 classification_output/03/mistranslation/23270873 create mode 100644 classification_output/03/mistranslation/24930826 create mode 100644 classification_output/03/mistranslation/25842545 create mode 100644 classification_output/03/mistranslation/36568044 create mode 100644 classification_output/03/mistranslation/64322995 create mode 100644 classification_output/03/mistranslation/70294255 create mode 100644 classification_output/03/mistranslation/74466963 create mode 100644 classification_output/03/mistranslation/74545755 create mode 100644 classification_output/03/mistranslation/80604314 create mode 100644 classification_output/03/network/05479587 create mode 100644 classification_output/03/network/62179944 create mode 100644 classification_output/03/other/02364653 create mode 100644 classification_output/03/other/02572177 create mode 100644 classification_output/03/other/12869209 create mode 100644 classification_output/03/other/13442371 create mode 100644 classification_output/03/other/14488057 create mode 100644 classification_output/03/other/16056596 create mode 100644 classification_output/03/other/16201167 create mode 100644 classification_output/03/other/16228234 create mode 100644 classification_output/03/other/17743720 create mode 100644 classification_output/03/other/21221931 create mode 100644 classification_output/03/other/21247035 create mode 100644 classification_output/03/other/23300761 create mode 100644 classification_output/03/other/23448582 create mode 100644 classification_output/03/other/25892827 create mode 100644 classification_output/03/other/31349848 create mode 100644 classification_output/03/other/32484936 create mode 100644 classification_output/03/other/35170175 create mode 100644 classification_output/03/other/42974450 create mode 100644 classification_output/03/other/48245039 create mode 100644 classification_output/03/other/55247116 create mode 100644 classification_output/03/other/55367348 create mode 100644 classification_output/03/other/55753058 create mode 100644 classification_output/03/other/56309929 create mode 100644 classification_output/03/other/56937788 create mode 100644 classification_output/03/other/57195159 create mode 100644 classification_output/03/other/57231878 create mode 100644 classification_output/03/other/57756589 create mode 100644 classification_output/03/other/59540920 create mode 100644 classification_output/03/other/64571620 create mode 100644 classification_output/03/other/65781993 create mode 100644 classification_output/03/other/66743673 create mode 100644 classification_output/03/other/68897003 create mode 100644 classification_output/03/other/70021271 create mode 100644 classification_output/03/other/70416488 create mode 100644 classification_output/03/other/74715356 create mode 100644 classification_output/03/other/79834768 create mode 100644 classification_output/03/other/81775929 create mode 100644 classification_output/03/other/85542195 create mode 100644 classification_output/03/other/88225572 create mode 100644 classification_output/03/other/88281850 create mode 100644 classification_output/03/other/92957605 create mode 100644 classification_output/03/other/95154278 create mode 100644 classification_output/03/other/99674399 create mode 100644 classification_output/03/semantic/12360755 create mode 100644 classification_output/03/semantic/28596630 create mode 100644 classification_output/03/semantic/30680944 create mode 100644 classification_output/03/semantic/46572227 create mode 100644 classification_output/03/semantic/53568181 create mode 100644 classification_output/03/semantic/80570214 create mode 100644 classification_output/03/semantic/96782458 create mode 100644 classification_output/03/semantic/gitlab_semantic_addsubps create mode 100644 classification_output/03/semantic/gitlab_semantic_adox create mode 100644 classification_output/03/semantic/gitlab_semantic_bextr create mode 100644 classification_output/03/semantic/gitlab_semantic_blsi create mode 100644 classification_output/03/semantic/gitlab_semantic_blsmsk create mode 100644 classification_output/03/semantic/gitlab_semantic_bzhi create mode 100644 classification_output/04/KVM/04472277 create mode 100644 classification_output/04/KVM/26430026 create mode 100644 classification_output/04/KVM/43643137 create mode 100644 classification_output/04/KVM/71456293 create mode 100644 classification_output/04/KVM/80615920 create mode 100644 classification_output/04/assembly/48245039 create mode 100644 classification_output/04/boot/51610399 create mode 100644 classification_output/04/boot/60339453 create mode 100644 classification_output/04/device/14488057 create mode 100644 classification_output/04/device/24190340 create mode 100644 classification_output/04/device/24930826 create mode 100644 classification_output/04/device/28596630 create mode 100644 classification_output/04/device/42226390 create mode 100644 classification_output/04/device/57195159 create mode 100644 classification_output/04/device/57231878 create mode 100644 classification_output/04/device/67821138 create mode 100644 classification_output/04/device/99674399 create mode 100644 classification_output/04/graphic/22219210 create mode 100644 classification_output/04/graphic/30680944 create mode 100644 classification_output/04/graphic/55961334 create mode 100644 classification_output/04/graphic/73660729 create mode 100644 classification_output/04/instruction/11933524 create mode 100644 classification_output/04/instruction/26095107 create mode 100644 classification_output/04/instruction/50773216 create mode 100644 classification_output/04/instruction/63565653 create mode 100644 classification_output/04/instruction/70868267 create mode 100644 classification_output/04/mistranslation/14887122 create mode 100644 classification_output/04/mistranslation/23270873 create mode 100644 classification_output/04/mistranslation/25842545 create mode 100644 classification_output/04/mistranslation/36568044 create mode 100644 classification_output/04/mistranslation/64322995 create mode 100644 classification_output/04/mistranslation/70294255 create mode 100644 classification_output/04/mistranslation/74466963 create mode 100644 classification_output/04/mistranslation/74545755 create mode 100644 classification_output/04/mistranslation/80604314 create mode 100644 classification_output/04/network/05479587 create mode 100644 classification_output/04/network/62179944 create mode 100644 classification_output/04/other/02364653 create mode 100644 classification_output/04/other/02572177 create mode 100644 classification_output/04/other/12869209 create mode 100644 classification_output/04/other/13442371 create mode 100644 classification_output/04/other/16056596 create mode 100644 classification_output/04/other/16201167 create mode 100644 classification_output/04/other/16228234 create mode 100644 classification_output/04/other/17743720 create mode 100644 classification_output/04/other/21221931 create mode 100644 classification_output/04/other/21247035 create mode 100644 classification_output/04/other/23300761 create mode 100644 classification_output/04/other/23448582 create mode 100644 classification_output/04/other/25892827 create mode 100644 classification_output/04/other/31349848 create mode 100644 classification_output/04/other/32484936 create mode 100644 classification_output/04/other/35170175 create mode 100644 classification_output/04/other/42974450 create mode 100644 classification_output/04/other/55247116 create mode 100644 classification_output/04/other/55367348 create mode 100644 classification_output/04/other/55753058 create mode 100644 classification_output/04/other/56309929 create mode 100644 classification_output/04/other/56937788 create mode 100644 classification_output/04/other/57756589 create mode 100644 classification_output/04/other/59540920 create mode 100644 classification_output/04/other/64571620 create mode 100644 classification_output/04/other/65781993 create mode 100644 classification_output/04/other/66743673 create mode 100644 classification_output/04/other/68897003 create mode 100644 classification_output/04/other/70021271 create mode 100644 classification_output/04/other/70416488 create mode 100644 classification_output/04/other/74715356 create mode 100644 classification_output/04/other/79834768 create mode 100644 classification_output/04/other/81775929 create mode 100644 classification_output/04/other/85542195 create mode 100644 classification_output/04/other/88225572 create mode 100644 classification_output/04/other/88281850 create mode 100644 classification_output/04/other/92957605 create mode 100644 classification_output/04/other/95154278 create mode 100644 classification_output/04/semantic/12360755 create mode 100644 classification_output/04/semantic/46572227 create mode 100644 classification_output/04/semantic/53568181 create mode 100644 classification_output/04/semantic/96782458 create mode 100644 classification_output/04/semantic/gitlab_semantic_addsubps create mode 100644 classification_output/04/semantic/gitlab_semantic_adox create mode 100644 classification_output/04/semantic/gitlab_semantic_bextr create mode 100644 classification_output/04/semantic/gitlab_semantic_blsi create mode 100644 classification_output/04/semantic/gitlab_semantic_blsmsk create mode 100644 classification_output/04/semantic/gitlab_semantic_bzhi create mode 100644 classification_output/04/vnc/11357571 create mode 100644 classification_output/04/vnc/33802194 create mode 100644 classification_output/04/vnc/42613410 create mode 100644 classification_output/04/vnc/80570214 create mode 100644 classification_output/05/KVM/04472277 create mode 100644 classification_output/05/KVM/26430026 create mode 100644 classification_output/05/KVM/43643137 create mode 100644 classification_output/05/KVM/71456293 create mode 100644 classification_output/05/KVM/80615920 create mode 100644 classification_output/05/boot/51610399 create mode 100644 classification_output/05/boot/60339453 create mode 100644 classification_output/05/device/14488057 create mode 100644 classification_output/05/device/24190340 create mode 100644 classification_output/05/device/24930826 create mode 100644 classification_output/05/device/26095107 create mode 100644 classification_output/05/device/28596630 create mode 100644 classification_output/05/device/36568044 create mode 100644 classification_output/05/device/42226390 create mode 100644 classification_output/05/device/48245039 create mode 100644 classification_output/05/device/57195159 create mode 100644 classification_output/05/device/57231878 create mode 100644 classification_output/05/device/67821138 create mode 100644 classification_output/05/device/99674399 create mode 100644 classification_output/05/graphic/22219210 create mode 100644 classification_output/05/graphic/30680944 create mode 100644 classification_output/05/graphic/46572227 create mode 100644 classification_output/05/graphic/53568181 create mode 100644 classification_output/05/graphic/55961334 create mode 100644 classification_output/05/graphic/73660729 create mode 100644 classification_output/05/instruction/11933524 create mode 100644 classification_output/05/instruction/50773216 create mode 100644 classification_output/05/instruction/63565653 create mode 100644 classification_output/05/instruction/70868267 create mode 100644 classification_output/05/mistranslation/14887122 create mode 100644 classification_output/05/mistranslation/23270873 create mode 100644 classification_output/05/mistranslation/25842545 create mode 100644 classification_output/05/mistranslation/64322995 create mode 100644 classification_output/05/mistranslation/70294255 create mode 100644 classification_output/05/mistranslation/74466963 create mode 100644 classification_output/05/mistranslation/74545755 create mode 100644 classification_output/05/mistranslation/80604314 create mode 100644 classification_output/05/network/05479587 create mode 100644 classification_output/05/network/62179944 create mode 100644 classification_output/05/other/02364653 create mode 100644 classification_output/05/other/02572177 create mode 100644 classification_output/05/other/12869209 create mode 100644 classification_output/05/other/13442371 create mode 100644 classification_output/05/other/16056596 create mode 100644 classification_output/05/other/16201167 create mode 100644 classification_output/05/other/16228234 create mode 100644 classification_output/05/other/17743720 create mode 100644 classification_output/05/other/21221931 create mode 100644 classification_output/05/other/21247035 create mode 100644 classification_output/05/other/23300761 create mode 100644 classification_output/05/other/23448582 create mode 100644 classification_output/05/other/25892827 create mode 100644 classification_output/05/other/31349848 create mode 100644 classification_output/05/other/32484936 create mode 100644 classification_output/05/other/35170175 create mode 100644 classification_output/05/other/42974450 create mode 100644 classification_output/05/other/55247116 create mode 100644 classification_output/05/other/55367348 create mode 100644 classification_output/05/other/55753058 create mode 100644 classification_output/05/other/56309929 create mode 100644 classification_output/05/other/56937788 create mode 100644 classification_output/05/other/57756589 create mode 100644 classification_output/05/other/59540920 create mode 100644 classification_output/05/other/64571620 create mode 100644 classification_output/05/other/65781993 create mode 100644 classification_output/05/other/66743673 create mode 100644 classification_output/05/other/68897003 create mode 100644 classification_output/05/other/70021271 create mode 100644 classification_output/05/other/70416488 create mode 100644 classification_output/05/other/74715356 create mode 100644 classification_output/05/other/79834768 create mode 100644 classification_output/05/other/81775929 create mode 100644 classification_output/05/other/85542195 create mode 100644 classification_output/05/other/88225572 create mode 100644 classification_output/05/other/88281850 create mode 100644 classification_output/05/other/92957605 create mode 100644 classification_output/05/other/95154278 create mode 100644 classification_output/05/other/96782458 create mode 100644 classification_output/05/semantic/12360755 create mode 100644 classification_output/05/semantic/gitlab_semantic_addsubps create mode 100644 classification_output/05/semantic/gitlab_semantic_adox create mode 100644 classification_output/05/semantic/gitlab_semantic_bextr create mode 100644 classification_output/05/semantic/gitlab_semantic_blsi create mode 100644 classification_output/05/semantic/gitlab_semantic_blsmsk create mode 100644 classification_output/05/semantic/gitlab_semantic_bzhi create mode 100644 classification_output/05/vnc/11357571 create mode 100644 classification_output/05/vnc/33802194 create mode 100644 classification_output/05/vnc/42613410 create mode 100644 classification_output/05/vnc/80570214 (limited to 'classification_output') diff --git a/classification_output/01/README.md b/classification_output/01/README.md deleted file mode 100644 index b71a384d1..000000000 --- a/classification_output/01/README.md +++ /dev/null @@ -1,4 +0,0 @@ -- instruction: 13 -- mistranslation: 14 -- semantic: 8 -- other: 48 diff --git a/classification_output/01/instruction/0966902 b/classification_output/01/instruction/0966902 deleted file mode 100644 index 80cdabd29..000000000 --- a/classification_output/01/instruction/0966902 +++ /dev/null @@ -1,39 +0,0 @@ -instruction: 0.803 -semantic: 0.775 -mistranslation: 0.718 -other: 0.715 - -[Bug] "-ht" flag ignored under KVM - guest still reports HT - -Hi Community, -We have observed that the 'ht' feature bit cannot be disabled when QEMU runs -with KVM acceleration. -qemu-system-x86_64 \ - --enable-kvm \ - -machine q35 \ - -cpu host,-ht \ - -smp 4 \ - -m 4G \ - -drive file=rootfs.img,format=raw \ - -nographic \ - -append 'console=ttyS0 root=/dev/sda rw' -Because '-ht' is specified, the guest should expose no HT capability -(cpuid.1.edx[28] = 0), and /proc/cpuinfo shouldn't show HT feature, but we still -saw ht in linux guest when run 'cat /proc/cpuinfo'. -XiaoYao mentioned that: - -It has been the behavior of QEMU since - - commit 400281af34e5ee6aa9f5496b53d8f82c6fef9319 - Author: Andre Przywara - Date: Wed Aug 19 15:42:42 2009 +0200 - - set CPUID bits to present cores and threads topology - -that we cannot remove HT CPUID bit from guest via "-cpu xxx,-ht" if the -VM has >= 2 vcpus. -I'd like to know whether there's a plan to address this issue, or if the current -behaviour is considered acceptable. -Best regards, -Ewan. - diff --git a/classification_output/01/instruction/11357571 b/classification_output/01/instruction/11357571 new file mode 100644 index 000000000..1c3bc483f --- /dev/null +++ b/classification_output/01/instruction/11357571 @@ -0,0 +1,47 @@ +instruction: 0.758 +semantic: 0.694 +other: 0.687 +mistranslation: 0.516 + +[Qemu-devel] [BUG] VNC: client won't send FramebufferUpdateRequest if job in flight is aborted + +Hi Gerd, Daniel. + +We noticed that if VncSharePolicy was configured with +VNC_SHARE_POLICY_FORCE_SHARED mode and +multiple vnc clients opened vnc connections, some clients could go blank screen +at high probability. +This problem can be reproduced when we regularly reboot suse12sp3 in graphic +mode both +with RealVNC and noVNC client. + +Then we dig into it and find out that some clients go blank screen because they +don't +send FramebufferUpdateRequest any more. One step further, we notice that each +time +the job in flight is aborted one client go blank screen. + +The bug is triggered in the following procedure. +Guest reboot => graphic mode switch => graphic_hw_update => vga_update_display +=> vga_draw_graphic (full_update = 1) => dpy_gfx_replace_surface => +vnc_dpy_switch => +vnc_abort_display_jobs (client may have job in flight) => job removed from the +queue +If one client has vnc job in flight, *vnc_abort_display_jobs* will wait until +its job is abandoned. +This behavior is done in vnc_worker_thread_loop when 'if (job->vs->ioc == NULL +|| job->vs->abort == true)' +branch is taken. + +As we can see, *vnc_abort_display_jobs* is intended to do some optimization to +avoid unnecessary client update. +But if client sends FramebufferUpdateRequest for some graphic area and its +FramebufferUpdate response job +is abandoned, the client may wait for the response and never send new +FramebufferUpdateRequest, which may +case the client go blank screen forever. + +So I am wondering whether we should drop the *vnc_abort_display_jobs* +optimization or do some trick here +to push the client to send new FramebufferUpdateRequest. Do you have any idea ? + diff --git a/classification_output/01/instruction/11933524 b/classification_output/01/instruction/11933524 new file mode 100644 index 000000000..3ff255be0 --- /dev/null +++ b/classification_output/01/instruction/11933524 @@ -0,0 +1,1125 @@ +instruction: 0.775 +other: 0.771 +mistranslation: 0.719 +semantic: 0.673 + +[BUG] hw/i386/pc.c: CXL Fixed Memory Window should not reserve e820 in bios + +Early-boot e820 records will be inserted by the bios/efi/early boot +software and be reported to the kernel via insert_resource. Later, when +CXL drivers iterate through the regions again, they will insert another +resource and make the RESERVED memory area a child. + +This RESERVED memory area causes the memory region to become unusable, +and as a result attempting to create memory regions with + + `cxl create-region ...` + +Will fail due to the RESERVED area intersecting with the CXL window. + + +During boot the following traceback is observed: + +0xffffffff81101650 in insert_resource_expand_to_fit () +0xffffffff83d964c5 in e820__reserve_resources_late () +0xffffffff83e03210 in pcibios_resource_survey () +0xffffffff83e04f4a in pcibios_init () + +Which produces a call to reserve the CFMWS area: + +(gdb) p *new +$54 = {start = 0x290000000, end = 0x2cfffffff, name = "Reserved", + flags = 0x200, desc = 0x7, parent = 0x0, sibling = 0x0, + child = 0x0} + +Later the Kernel parses ACPI tables and reserves the exact same area as +the CXL Fixed Memory Window. The use of `insert_resource_conflict` +retains the RESERVED region and makes it a child of the new region. + +0xffffffff811016a4 in insert_resource_conflict () + insert_resource () +0xffffffff81a81389 in cxl_parse_cfmws () +0xffffffff818c4a81 in call_handler () + acpi_parse_entries_array () + +(gdb) p/x *new +$59 = {start = 0x290000000, end = 0x2cfffffff, name = "CXL Window 0", + flags = 0x200, desc = 0x0, parent = 0x0, sibling = 0x0, + child = 0x0} + +This produces the following output in /proc/iomem: + +590000000-68fffffff : CXL Window 0 + 590000000-68fffffff : Reserved + +This reserved area causes `get_free_mem_region()` to fail due to a check +against `__region_intersects()`. Due to this reserved area, the +intersect check will only ever return REGION_INTERSECTS, which causes +`cxl create-region` to always fail. + +Signed-off-by: Gregory Price +--- + hw/i386/pc.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 566accf7e6..5bf5465a21 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, + hwaddr cxl_size = MiB; + + cxl_base = pc_get_cxl_range_start(pcms); +- e820_add_entry(cxl_base, cxl_size, E820_RESERVED); + memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); + memory_region_add_subregion(system_memory, cxl_base, mr); + cxl_resv_end = cxl_base + cxl_size; +@@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, + memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, fw, + "cxl-fixed-memory-region", fw->size); + memory_region_add_subregion(system_memory, fw->base, &fw->mr); +- e820_add_entry(fw->base, fw->size, E820_RESERVED); + cxl_fmw_base += fw->size; + cxl_resv_end = cxl_fmw_base; + } +-- +2.37.3 + +Early-boot e820 records will be inserted by the bios/efi/early boot +software and be reported to the kernel via insert_resource. Later, when +CXL drivers iterate through the regions again, they will insert another +resource and make the RESERVED memory area a child. + +This RESERVED memory area causes the memory region to become unusable, +and as a result attempting to create memory regions with + + `cxl create-region ...` + +Will fail due to the RESERVED area intersecting with the CXL window. + + +During boot the following traceback is observed: + +0xffffffff81101650 in insert_resource_expand_to_fit () +0xffffffff83d964c5 in e820__reserve_resources_late () +0xffffffff83e03210 in pcibios_resource_survey () +0xffffffff83e04f4a in pcibios_init () + +Which produces a call to reserve the CFMWS area: + +(gdb) p *new +$54 = {start = 0x290000000, end = 0x2cfffffff, name = "Reserved", + flags = 0x200, desc = 0x7, parent = 0x0, sibling = 0x0, + child = 0x0} + +Later the Kernel parses ACPI tables and reserves the exact same area as +the CXL Fixed Memory Window. The use of `insert_resource_conflict` +retains the RESERVED region and makes it a child of the new region. + +0xffffffff811016a4 in insert_resource_conflict () + insert_resource () +0xffffffff81a81389 in cxl_parse_cfmws () +0xffffffff818c4a81 in call_handler () + acpi_parse_entries_array () + +(gdb) p/x *new +$59 = {start = 0x290000000, end = 0x2cfffffff, name = "CXL Window 0", + flags = 0x200, desc = 0x0, parent = 0x0, sibling = 0x0, + child = 0x0} + +This produces the following output in /proc/iomem: + +590000000-68fffffff : CXL Window 0 + 590000000-68fffffff : Reserved + +This reserved area causes `get_free_mem_region()` to fail due to a check +against `__region_intersects()`. Due to this reserved area, the +intersect check will only ever return REGION_INTERSECTS, which causes +`cxl create-region` to always fail. + +Signed-off-by: Gregory Price +--- + hw/i386/pc.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 566accf7e6..5bf5465a21 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, + hwaddr cxl_size = MiB; +cxl_base = pc_get_cxl_range_start(pcms); +- e820_add_entry(cxl_base, cxl_size, E820_RESERVED); + memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); + memory_region_add_subregion(system_memory, cxl_base, mr); + cxl_resv_end = cxl_base + cxl_size; +@@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, + memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, +fw, + "cxl-fixed-memory-region", fw->size); + memory_region_add_subregion(system_memory, fw->base, &fw->mr); +Or will this be subregion of cxl_base? + +Thanks, +Pankaj +- e820_add_entry(fw->base, fw->size, E820_RESERVED); + cxl_fmw_base += fw->size; + cxl_resv_end = cxl_fmw_base; + } + +> +> - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +> +> memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); +> +> memory_region_add_subregion(system_memory, cxl_base, mr); +> +> cxl_resv_end = cxl_base + cxl_size; +> +> @@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, +> +> memory_region_init_io(&fw->mr, OBJECT(machine), +> +> &cfmws_ops, fw, +> +> "cxl-fixed-memory-region", +> +> fw->size); +> +> memory_region_add_subregion(system_memory, fw->base, +> +> &fw->mr); +> +> +Or will this be subregion of cxl_base? +> +> +Thanks, +> +Pankaj +The memory region backing this memory area still has to be initialized +and added in the QEMU system, but it will now be initialized for use by +linux after PCI/ACPI setup occurs and the CXL driver discovers it via +CDAT. + +It's also still possible to assign this area a static memory region at +bool by setting up the SRATs in the ACPI tables, but that patch is not +upstream yet. + +On Tue, Oct 18, 2022 at 5:14 AM Gregory Price wrote: +> +> +Early-boot e820 records will be inserted by the bios/efi/early boot +> +software and be reported to the kernel via insert_resource. Later, when +> +CXL drivers iterate through the regions again, they will insert another +> +resource and make the RESERVED memory area a child. +I have already sent a patch +https://www.mail-archive.com/qemu-devel@nongnu.org/msg882012.html +. +When the patch is applied, there would not be any reserved entries +even with passing E820_RESERVED . +So this patch needs to be evaluated in the light of the above patch I +sent. Once you apply my patch, does the issue still exist? + +> +> +This RESERVED memory area causes the memory region to become unusable, +> +and as a result attempting to create memory regions with +> +> +`cxl create-region ...` +> +> +Will fail due to the RESERVED area intersecting with the CXL window. +> +> +> +During boot the following traceback is observed: +> +> +0xffffffff81101650 in insert_resource_expand_to_fit () +> +0xffffffff83d964c5 in e820__reserve_resources_late () +> +0xffffffff83e03210 in pcibios_resource_survey () +> +0xffffffff83e04f4a in pcibios_init () +> +> +Which produces a call to reserve the CFMWS area: +> +> +(gdb) p *new +> +$54 = {start = 0x290000000, end = 0x2cfffffff, name = "Reserved", +> +flags = 0x200, desc = 0x7, parent = 0x0, sibling = 0x0, +> +child = 0x0} +> +> +Later the Kernel parses ACPI tables and reserves the exact same area as +> +the CXL Fixed Memory Window. The use of `insert_resource_conflict` +> +retains the RESERVED region and makes it a child of the new region. +> +> +0xffffffff811016a4 in insert_resource_conflict () +> +insert_resource () +> +0xffffffff81a81389 in cxl_parse_cfmws () +> +0xffffffff818c4a81 in call_handler () +> +acpi_parse_entries_array () +> +> +(gdb) p/x *new +> +$59 = {start = 0x290000000, end = 0x2cfffffff, name = "CXL Window 0", +> +flags = 0x200, desc = 0x0, parent = 0x0, sibling = 0x0, +> +child = 0x0} +> +> +This produces the following output in /proc/iomem: +> +> +590000000-68fffffff : CXL Window 0 +> +590000000-68fffffff : Reserved +> +> +This reserved area causes `get_free_mem_region()` to fail due to a check +> +against `__region_intersects()`. Due to this reserved area, the +> +intersect check will only ever return REGION_INTERSECTS, which causes +> +`cxl create-region` to always fail. +> +> +Signed-off-by: Gregory Price +> +--- +> +hw/i386/pc.c | 2 -- +> +1 file changed, 2 deletions(-) +> +> +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +> +index 566accf7e6..5bf5465a21 100644 +> +--- a/hw/i386/pc.c +> ++++ b/hw/i386/pc.c +> +@@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, +> +hwaddr cxl_size = MiB; +> +> +cxl_base = pc_get_cxl_range_start(pcms); +> +- e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +> +memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); +> +memory_region_add_subregion(system_memory, cxl_base, mr); +> +cxl_resv_end = cxl_base + cxl_size; +> +@@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, +> +memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, +> +fw, +> +"cxl-fixed-memory-region", fw->size); +> +memory_region_add_subregion(system_memory, fw->base, +> +&fw->mr); +> +- e820_add_entry(fw->base, fw->size, E820_RESERVED); +> +cxl_fmw_base += fw->size; +> +cxl_resv_end = cxl_fmw_base; +> +} +> +-- +> +2.37.3 +> + +This patch does not resolve the issue, reserved entries are still created. +[    0.000000] BIOS-e820: [mem 0x0000000280000000-0x00000002800fffff] reserved +[    0.000000] BIOS-e820: [mem 0x0000000290000000-0x000000029fffffff] reserved +# cat /proc/iomem +290000000-29fffffff : CXL Window 0 +  290000000-29fffffff : Reserved +# cxl create-region -m -d decoder0.0 -w 1 -g 256 mem0 +cxl region: create_region: region0: set_size failed: Numerical result out of range +cxl region: cmd_create_region: created 0 regions +On Tue, Oct 18, 2022 at 2:05 AM Ani Sinha < +ani@anisinha.ca +> wrote: +On Tue, Oct 18, 2022 at 5:14 AM Gregory Price < +gourry.memverge@gmail.com +> wrote: +> +> Early-boot e820 records will be inserted by the bios/efi/early boot +> software and be reported to the kernel via insert_resource.  Later, when +> CXL drivers iterate through the regions again, they will insert another +> resource and make the RESERVED memory area a child. +I have already sent a patch +https://www.mail-archive.com/qemu-devel@nongnu.org/msg882012.html +. +When the patch is applied, there would not be any reserved entries +even with passing E820_RESERVED . +So this patch needs to be evaluated in the light of the above patch I +sent. Once you apply my patch, does the issue still exist? +> +> This RESERVED memory area causes the memory region to become unusable, +> and as a result attempting to create memory regions with +> +>     `cxl create-region ...` +> +> Will fail due to the RESERVED area intersecting with the CXL window. +> +> +> During boot the following traceback is observed: +> +> 0xffffffff81101650 in insert_resource_expand_to_fit () +> 0xffffffff83d964c5 in e820__reserve_resources_late () +> 0xffffffff83e03210 in pcibios_resource_survey () +> 0xffffffff83e04f4a in pcibios_init () +> +> Which produces a call to reserve the CFMWS area: +> +> (gdb) p *new +> $54 = {start = 0x290000000, end = 0x2cfffffff, name = "Reserved", +>        flags = 0x200, desc = 0x7, parent = 0x0, sibling = 0x0, +>        child = 0x0} +> +> Later the Kernel parses ACPI tables and reserves the exact same area as +> the CXL Fixed Memory Window.  The use of `insert_resource_conflict` +> retains the RESERVED region and makes it a child of the new region. +> +> 0xffffffff811016a4 in insert_resource_conflict () +>                       insert_resource () +> 0xffffffff81a81389 in cxl_parse_cfmws () +> 0xffffffff818c4a81 in call_handler () +>                       acpi_parse_entries_array () +> +> (gdb) p/x *new +> $59 = {start = 0x290000000, end = 0x2cfffffff, name = "CXL Window 0", +>        flags = 0x200, desc = 0x0, parent = 0x0, sibling = 0x0, +>        child = 0x0} +> +> This produces the following output in /proc/iomem: +> +> 590000000-68fffffff : CXL Window 0 +>   590000000-68fffffff : Reserved +> +> This reserved area causes `get_free_mem_region()` to fail due to a check +> against `__region_intersects()`.  Due to this reserved area, the +> intersect check will only ever return REGION_INTERSECTS, which causes +> `cxl create-region` to always fail. +> +> Signed-off-by: Gregory Price < +gregory.price@memverge.com +> +> --- +>  hw/i386/pc.c | 2 -- +>  1 file changed, 2 deletions(-) +> +> diff --git a/hw/i386/pc.c b/hw/i386/pc.c +> index 566accf7e6..5bf5465a21 100644 +> --- a/hw/i386/pc.c +> +++ b/hw/i386/pc.c +> @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, +>          hwaddr cxl_size = MiB; +> +>          cxl_base = pc_get_cxl_range_start(pcms); +> -        e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +>          memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); +>          memory_region_add_subregion(system_memory, cxl_base, mr); +>          cxl_resv_end = cxl_base + cxl_size; +> @@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, +>                  memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, fw, +>                                        "cxl-fixed-memory-region", fw->size); +>                  memory_region_add_subregion(system_memory, fw->base, &fw->mr); +> -                e820_add_entry(fw->base, fw->size, E820_RESERVED); +>                  cxl_fmw_base += fw->size; +>                  cxl_resv_end = cxl_fmw_base; +>              } +> -- +> 2.37.3 +> + ++Gerd Hoffmann + +On Tue, Oct 18, 2022 at 8:16 PM Gregory Price wrote: +> +> +This patch does not resolve the issue, reserved entries are still created. +> +> +[ 0.000000] BIOS-e820: [mem 0x0000000280000000-0x00000002800fffff] reserved +> +[ 0.000000] BIOS-e820: [mem 0x0000000290000000-0x000000029fffffff] reserved +> +> +# cat /proc/iomem +> +290000000-29fffffff : CXL Window 0 +> +290000000-29fffffff : Reserved +> +> +# cxl create-region -m -d decoder0.0 -w 1 -g 256 mem0 +> +cxl region: create_region: region0: set_size failed: Numerical result out of +> +range +> +cxl region: cmd_create_region: created 0 regions +> +> +On Tue, Oct 18, 2022 at 2:05 AM Ani Sinha wrote: +> +> +> +> On Tue, Oct 18, 2022 at 5:14 AM Gregory Price +> +> wrote: +> +> > +> +> > Early-boot e820 records will be inserted by the bios/efi/early boot +> +> > software and be reported to the kernel via insert_resource. Later, when +> +> > CXL drivers iterate through the regions again, they will insert another +> +> > resource and make the RESERVED memory area a child. +> +> +> +> I have already sent a patch +> +> +https://www.mail-archive.com/qemu-devel@nongnu.org/msg882012.html +. +> +> When the patch is applied, there would not be any reserved entries +> +> even with passing E820_RESERVED . +> +> So this patch needs to be evaluated in the light of the above patch I +> +> sent. Once you apply my patch, does the issue still exist? +> +> +> +> > +> +> > This RESERVED memory area causes the memory region to become unusable, +> +> > and as a result attempting to create memory regions with +> +> > +> +> > `cxl create-region ...` +> +> > +> +> > Will fail due to the RESERVED area intersecting with the CXL window. +> +> > +> +> > +> +> > During boot the following traceback is observed: +> +> > +> +> > 0xffffffff81101650 in insert_resource_expand_to_fit () +> +> > 0xffffffff83d964c5 in e820__reserve_resources_late () +> +> > 0xffffffff83e03210 in pcibios_resource_survey () +> +> > 0xffffffff83e04f4a in pcibios_init () +> +> > +> +> > Which produces a call to reserve the CFMWS area: +> +> > +> +> > (gdb) p *new +> +> > $54 = {start = 0x290000000, end = 0x2cfffffff, name = "Reserved", +> +> > flags = 0x200, desc = 0x7, parent = 0x0, sibling = 0x0, +> +> > child = 0x0} +> +> > +> +> > Later the Kernel parses ACPI tables and reserves the exact same area as +> +> > the CXL Fixed Memory Window. The use of `insert_resource_conflict` +> +> > retains the RESERVED region and makes it a child of the new region. +> +> > +> +> > 0xffffffff811016a4 in insert_resource_conflict () +> +> > insert_resource () +> +> > 0xffffffff81a81389 in cxl_parse_cfmws () +> +> > 0xffffffff818c4a81 in call_handler () +> +> > acpi_parse_entries_array () +> +> > +> +> > (gdb) p/x *new +> +> > $59 = {start = 0x290000000, end = 0x2cfffffff, name = "CXL Window 0", +> +> > flags = 0x200, desc = 0x0, parent = 0x0, sibling = 0x0, +> +> > child = 0x0} +> +> > +> +> > This produces the following output in /proc/iomem: +> +> > +> +> > 590000000-68fffffff : CXL Window 0 +> +> > 590000000-68fffffff : Reserved +> +> > +> +> > This reserved area causes `get_free_mem_region()` to fail due to a check +> +> > against `__region_intersects()`. Due to this reserved area, the +> +> > intersect check will only ever return REGION_INTERSECTS, which causes +> +> > `cxl create-region` to always fail. +> +> > +> +> > Signed-off-by: Gregory Price +> +> > --- +> +> > hw/i386/pc.c | 2 -- +> +> > 1 file changed, 2 deletions(-) +> +> > +> +> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c +> +> > index 566accf7e6..5bf5465a21 100644 +> +> > --- a/hw/i386/pc.c +> +> > +++ b/hw/i386/pc.c +> +> > @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, +> +> > hwaddr cxl_size = MiB; +> +> > +> +> > cxl_base = pc_get_cxl_range_start(pcms); +> +> > - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +> +> > memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); +> +> > memory_region_add_subregion(system_memory, cxl_base, mr); +> +> > cxl_resv_end = cxl_base + cxl_size; +> +> > @@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, +> +> > memory_region_init_io(&fw->mr, OBJECT(machine), +> +> > &cfmws_ops, fw, +> +> > "cxl-fixed-memory-region", +> +> > fw->size); +> +> > memory_region_add_subregion(system_memory, fw->base, +> +> > &fw->mr); +> +> > - e820_add_entry(fw->base, fw->size, E820_RESERVED); +> +> > cxl_fmw_base += fw->size; +> +> > cxl_resv_end = cxl_fmw_base; +> +> > } +> +> > -- +> +> > 2.37.3 +> +> > + +> +>> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c +> +>> > index 566accf7e6..5bf5465a21 100644 +> +>> > --- a/hw/i386/pc.c +> +>> > +++ b/hw/i386/pc.c +> +>> > @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, +> +>> > hwaddr cxl_size = MiB; +> +>> > +> +>> > cxl_base = pc_get_cxl_range_start(pcms); +> +>> > - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +Just dropping it doesn't look like a good plan to me. + +You can try set etc/reserved-memory-end fw_cfg file instead. Firmware +(both seabios and ovmf) read it and will make sure the 64bit pci mmio +window is placed above that address, i.e. this effectively reserves +address space. Right now used by memory hotplug code, but should work +for cxl too I think (disclaimer: don't know much about cxl ...). + +take care & HTH, + Gerd + +On Tue, 8 Nov 2022 12:21:11 +0100 +Gerd Hoffmann wrote: + +> +> >> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c +> +> >> > index 566accf7e6..5bf5465a21 100644 +> +> >> > --- a/hw/i386/pc.c +> +> >> > +++ b/hw/i386/pc.c +> +> >> > @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, +> +> >> > hwaddr cxl_size = MiB; +> +> >> > +> +> >> > cxl_base = pc_get_cxl_range_start(pcms); +> +> >> > - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +> +> +Just dropping it doesn't look like a good plan to me. +> +> +You can try set etc/reserved-memory-end fw_cfg file instead. Firmware +> +(both seabios and ovmf) read it and will make sure the 64bit pci mmio +> +window is placed above that address, i.e. this effectively reserves +> +address space. Right now used by memory hotplug code, but should work +> +for cxl too I think (disclaimer: don't know much about cxl ...). +As far as I know CXL impl. in QEMU isn't using etc/reserved-memory-end +at all, it' has its own mapping. + +Regardless of that, reserved E820 entries look wrong, and looking at +commit message OS is right to bailout on them (expected according +to ACPI spec). +Also spec says + +" +E820 Assumptions and Limitations + [...] + The platform boot firmware does not return a range description for the memory +mapping of + PCI devices, ISA Option ROMs, and ISA Plug and Play cards because the OS has +mechanisms + available to detect them. +" + +so dropping reserved entries looks reasonable from ACPI spec point of view. +(disclaimer: don't know much about cxl ... either) +> +> +take care & HTH, +> +Gerd +> + +On Fri, Nov 11, 2022 at 11:51:23AM +0100, Igor Mammedov wrote: +> +On Tue, 8 Nov 2022 12:21:11 +0100 +> +Gerd Hoffmann wrote: +> +> +> > >> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c +> +> > >> > index 566accf7e6..5bf5465a21 100644 +> +> > >> > --- a/hw/i386/pc.c +> +> > >> > +++ b/hw/i386/pc.c +> +> > >> > @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, +> +> > >> > hwaddr cxl_size = MiB; +> +> > >> > +> +> > >> > cxl_base = pc_get_cxl_range_start(pcms); +> +> > >> > - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +> +> +> +> Just dropping it doesn't look like a good plan to me. +> +> +> +> You can try set etc/reserved-memory-end fw_cfg file instead. Firmware +> +> (both seabios and ovmf) read it and will make sure the 64bit pci mmio +> +> window is placed above that address, i.e. this effectively reserves +> +> address space. Right now used by memory hotplug code, but should work +> +> for cxl too I think (disclaimer: don't know much about cxl ...). +> +> +As far as I know CXL impl. in QEMU isn't using etc/reserved-memory-end +> +at all, it' has its own mapping. +This should be changed. cxl should make sure the highest address used +is stored in etc/reserved-memory-end to avoid the firmware mapping pci +resources there. + +> +so dropping reserved entries looks reasonable from ACPI spec point of view. +Yep, I don't want dispute that. + +I suspect the reason for these entries to exist in the first place is to +inform the firmware that it should not place stuff there, and if we +remove that to conform with the spec we need some alternative way for +that ... + +take care, + Gerd + +On Fri, 11 Nov 2022 12:40:59 +0100 +Gerd Hoffmann wrote: + +> +On Fri, Nov 11, 2022 at 11:51:23AM +0100, Igor Mammedov wrote: +> +> On Tue, 8 Nov 2022 12:21:11 +0100 +> +> Gerd Hoffmann wrote: +> +> +> +> > > >> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c +> +> > > >> > index 566accf7e6..5bf5465a21 100644 +> +> > > >> > --- a/hw/i386/pc.c +> +> > > >> > +++ b/hw/i386/pc.c +> +> > > >> > @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, +> +> > > >> > hwaddr cxl_size = MiB; +> +> > > >> > +> +> > > >> > cxl_base = pc_get_cxl_range_start(pcms); +> +> > > >> > - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +> +> > +> +> > Just dropping it doesn't look like a good plan to me. +> +> > +> +> > You can try set etc/reserved-memory-end fw_cfg file instead. Firmware +> +> > (both seabios and ovmf) read it and will make sure the 64bit pci mmio +> +> > window is placed above that address, i.e. this effectively reserves +> +> > address space. Right now used by memory hotplug code, but should work +> +> > for cxl too I think (disclaimer: don't know much about cxl ...). +> +> +> +> As far as I know CXL impl. in QEMU isn't using etc/reserved-memory-end +> +> at all, it' has its own mapping. +> +> +This should be changed. cxl should make sure the highest address used +> +is stored in etc/reserved-memory-end to avoid the firmware mapping pci +> +resources there. +if (pcmc->has_reserved_memory && machine->device_memory->base) { + +[...] + + if (pcms->cxl_devices_state.is_enabled) { + + res_mem_end = cxl_resv_end; + +that should be handled by this line + + } + + *val = cpu_to_le64(ROUND_UP(res_mem_end, 1 * GiB)); + + fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val)); + + } + +so SeaBIOS shouldn't intrude into CXL address space +(I assume EDK2 behave similarly here) + +> +> so dropping reserved entries looks reasonable from ACPI spec point of view. +> +> +> +> +Yep, I don't want dispute that. +> +> +I suspect the reason for these entries to exist in the first place is to +> +inform the firmware that it should not place stuff there, and if we +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +just to educate me, can you point out what SeaBIOS code does with reservations. + +> +remove that to conform with the spec we need some alternative way for +> +that ... +with etc/reserved-memory-end set as above, +is E820_RESERVED really needed here? + +(my understanding was that E820_RESERVED weren't accounted for when +initializing PCI devices) + +> +> +take care, +> +Gerd +> + +> +if (pcmc->has_reserved_memory && machine->device_memory->base) { +> +> +[...] +> +> +if (pcms->cxl_devices_state.is_enabled) { +> +> +res_mem_end = cxl_resv_end; +> +> +that should be handled by this line +> +> +} +> +> +*val = cpu_to_le64(ROUND_UP(res_mem_end, 1 * GiB)); +> +> +fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, +> +sizeof(*val)); +> +} +> +> +so SeaBIOS shouldn't intrude into CXL address space +Yes, looks good, so with this in place already everyting should be fine. + +> +(I assume EDK2 behave similarly here) +Correct, ovmf reads that fw_cfg file too. + +> +> I suspect the reason for these entries to exist in the first place is to +> +> inform the firmware that it should not place stuff there, and if we +> +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +> +just to educate me, can you point out what SeaBIOS code does with +> +reservations. +They are added to the e820 map which gets passed on to the OS. seabios +uses (and updateas) the e820 map too, when allocating memory for +example. While thinking about it I'm not fully sure it actually looks +at reservations, maybe it only uses (and updates) ram entries when +allocating memory. + +> +> remove that to conform with the spec we need some alternative way for +> +> that ... +> +> +with etc/reserved-memory-end set as above, +> +is E820_RESERVED really needed here? +No. Setting etc/reserved-memory-end is enough. + +So for the original patch: +Acked-by: Gerd Hoffmann + +take care, + Gerd + +On Fri, Nov 11, 2022 at 02:36:02PM +0100, Gerd Hoffmann wrote: +> +> if (pcmc->has_reserved_memory && machine->device_memory->base) { +> +> +> +> [...] +> +> +> +> if (pcms->cxl_devices_state.is_enabled) { +> +> +> +> res_mem_end = cxl_resv_end; +> +> +> +> that should be handled by this line +> +> +> +> } +> +> +> +> *val = cpu_to_le64(ROUND_UP(res_mem_end, 1 * GiB)); +> +> +> +> fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, +> +> sizeof(*val)); +> +> } +> +> +> +> so SeaBIOS shouldn't intrude into CXL address space +> +> +Yes, looks good, so with this in place already everyting should be fine. +> +> +> (I assume EDK2 behave similarly here) +> +> +Correct, ovmf reads that fw_cfg file too. +> +> +> > I suspect the reason for these entries to exist in the first place is to +> +> > inform the firmware that it should not place stuff there, and if we +> +> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +> +> just to educate me, can you point out what SeaBIOS code does with +> +> reservations. +> +> +They are added to the e820 map which gets passed on to the OS. seabios +> +uses (and updateas) the e820 map too, when allocating memory for +> +example. While thinking about it I'm not fully sure it actually looks +> +at reservations, maybe it only uses (and updates) ram entries when +> +allocating memory. +> +> +> > remove that to conform with the spec we need some alternative way for +> +> > that ... +> +> +> +> with etc/reserved-memory-end set as above, +> +> is E820_RESERVED really needed here? +> +> +No. Setting etc/reserved-memory-end is enough. +> +> +So for the original patch: +> +Acked-by: Gerd Hoffmann +> +> +take care, +> +Gerd +It's upstream already, sorry I can't add your tag. + +-- +MST + diff --git a/classification_output/01/instruction/24190340 b/classification_output/01/instruction/24190340 new file mode 100644 index 000000000..784962c9c --- /dev/null +++ b/classification_output/01/instruction/24190340 @@ -0,0 +1,2056 @@ +instruction: 0.818 +other: 0.811 +semantic: 0.793 +mistranslation: 0.758 + +[BUG, RFC] Block graph deadlock on job-dismiss + +Hi all, + +There's a bug in block layer which leads to block graph deadlock. +Notably, it takes place when blockdev IO is processed within a separate +iothread. + +This was initially caught by our tests, and I was able to reduce it to a +relatively simple reproducer. Such deadlocks are probably supposed to +be covered in iotests/graph-changes-while-io, but this deadlock isn't. + +Basically what the reproducer does is launches QEMU with a drive having +'iothread' option set, creates a chain of 2 snapshots, launches +block-commit job for a snapshot and then dismisses the job, starting +from the lower snapshot. If the guest is issuing IO at the same time, +there's a race in acquiring block graph lock and a potential deadlock. + +Here's how it can be reproduced: + +1. Run QEMU: +> +SRCDIR=/path/to/srcdir +> +> +> +> +> +$SRCDIR/build/qemu-system-x86_64 -enable-kvm \ +> +> +-machine q35 -cpu Nehalem \ +> +> +-name guest=alma8-vm,debug-threads=on \ +> +> +-m 2g -smp 2 \ +> +> +-nographic -nodefaults \ +> +> +-qmp unix:/var/run/alma8-qmp.sock,server=on,wait=off \ +> +> +-serial unix:/var/run/alma8-serial.sock,server=on,wait=off \ +> +> +-object iothread,id=iothread0 \ +> +> +-blockdev +> +node-name=disk,driver=qcow2,file.driver=file,file.filename=/path/to/img/alma8.qcow2 +> +\ +> +-device virtio-blk-pci,drive=disk,iothread=iothread0 +2. Launch IO (random reads) from within the guest: +> +nc -U /var/run/alma8-serial.sock +> +... +> +[root@alma8-vm ~]# fio --name=randread --ioengine=libaio --direct=1 --bs=4k +> +--size=1G --numjobs=1 --time_based=1 --runtime=300 --group_reporting +> +--rw=randread --iodepth=1 --filename=/testfile +3. Run snapshots creation & removal of lower snapshot operation in a +loop (script attached): +> +while /bin/true ; do ./remove_lower_snap.sh ; done +And then it occasionally hangs. + +Note: I've tried bisecting this, and looks like deadlock occurs starting +from the following commit: + +(BAD) 5bdbaebcce virtio: Re-enable notifications after drain +(GOOD) c42c3833e0 virtio-scsi: Attach event vq notifier with no_poll + +On the latest v10.0.0 it does hang as well. + + +Here's backtrace of the main thread: + +> +#0 0x00007fc547d427ce in __ppoll (fds=0x557eb79657b0, nfds=1, +> +timeout=, sigmask=0x0) at ../sysdeps/unix/sysv/linux/ppoll.c:43 +> +#1 0x0000557eb47d955c in qemu_poll_ns (fds=0x557eb79657b0, nfds=1, +> +timeout=-1) at ../util/qemu-timer.c:329 +> +#2 0x0000557eb47b2204 in fdmon_poll_wait (ctx=0x557eb76c5f20, +> +ready_list=0x7ffd94b4edd8, timeout=-1) at ../util/fdmon-poll.c:79 +> +#3 0x0000557eb47b1c45 in aio_poll (ctx=0x557eb76c5f20, blocking=true) at +> +../util/aio-posix.c:730 +> +#4 0x0000557eb4621edd in bdrv_do_drained_begin (bs=0x557eb795e950, +> +parent=0x0, poll=true) at ../block/io.c:378 +> +#5 0x0000557eb4621f7b in bdrv_drained_begin (bs=0x557eb795e950) at +> +../block/io.c:391 +> +#6 0x0000557eb45ec125 in bdrv_change_aio_context (bs=0x557eb795e950, +> +ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +> +errp=0x0) +> +at ../block.c:7682 +> +#7 0x0000557eb45ebf2b in bdrv_child_change_aio_context (c=0x557eb7964250, +> +ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +> +errp=0x0) +> +at ../block.c:7608 +> +#8 0x0000557eb45ec0c4 in bdrv_change_aio_context (bs=0x557eb79575e0, +> +ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +> +errp=0x0) +> +at ../block.c:7668 +> +#9 0x0000557eb45ebf2b in bdrv_child_change_aio_context (c=0x557eb7e59110, +> +ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +> +errp=0x0) +> +at ../block.c:7608 +> +#10 0x0000557eb45ec0c4 in bdrv_change_aio_context (bs=0x557eb7e51960, +> +ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +> +errp=0x0) +> +at ../block.c:7668 +> +#11 0x0000557eb45ebf2b in bdrv_child_change_aio_context (c=0x557eb814ed80, +> +ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +> +errp=0x0) +> +at ../block.c:7608 +> +#12 0x0000557eb45ee8e4 in child_job_change_aio_ctx (c=0x557eb7c9d3f0, +> +ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +> +errp=0x0) +> +at ../blockjob.c:157 +> +#13 0x0000557eb45ebe2d in bdrv_parent_change_aio_context (c=0x557eb7c9d3f0, +> +ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +> +errp=0x0) +> +at ../block.c:7592 +> +#14 0x0000557eb45ec06b in bdrv_change_aio_context (bs=0x557eb7d74310, +> +ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +> +errp=0x0) +> +at ../block.c:7661 +> +#15 0x0000557eb45dcd7e in bdrv_child_cb_change_aio_ctx +> +(child=0x557eb8565af0, ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = +> +{...}, tran=0x557eb7a87160, errp=0x0) at ../block.c:1234 +> +#16 0x0000557eb45ebe2d in bdrv_parent_change_aio_context (c=0x557eb8565af0, +> +ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +> +errp=0x0) +> +at ../block.c:7592 +> +#17 0x0000557eb45ec06b in bdrv_change_aio_context (bs=0x557eb79575e0, +> +ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +> +errp=0x0) +> +at ../block.c:7661 +> +#18 0x0000557eb45ec1f3 in bdrv_try_change_aio_context (bs=0x557eb79575e0, +> +ctx=0x557eb76c5f20, ignore_child=0x0, errp=0x0) at ../block.c:7715 +> +#19 0x0000557eb45e1b15 in bdrv_root_unref_child (child=0x557eb7966f30) at +> +../block.c:3317 +> +#20 0x0000557eb45eeaa8 in block_job_remove_all_bdrv (job=0x557eb7952800) at +> +../blockjob.c:209 +> +#21 0x0000557eb45ee641 in block_job_free (job=0x557eb7952800) at +> +../blockjob.c:82 +> +#22 0x0000557eb45f17af in job_unref_locked (job=0x557eb7952800) at +> +../job.c:474 +> +#23 0x0000557eb45f257d in job_do_dismiss_locked (job=0x557eb7952800) at +> +../job.c:771 +> +#24 0x0000557eb45f25fe in job_dismiss_locked (jobptr=0x7ffd94b4f400, +> +errp=0x7ffd94b4f488) at ../job.c:783 +> +--Type for more, q to quit, c to continue without paging-- +> +#25 0x0000557eb45d8e84 in qmp_job_dismiss (id=0x557eb7aa42b0 "commit-snap1", +> +errp=0x7ffd94b4f488) at ../job-qmp.c:138 +> +#26 0x0000557eb472f6a3 in qmp_marshal_job_dismiss (args=0x7fc52c00a3b0, +> +ret=0x7fc53c880da8, errp=0x7fc53c880da0) at qapi/qapi-commands-job.c:221 +> +#27 0x0000557eb47a35f3 in do_qmp_dispatch_bh (opaque=0x7fc53c880e40) at +> +../qapi/qmp-dispatch.c:128 +> +#28 0x0000557eb47d1cd2 in aio_bh_call (bh=0x557eb79568f0) at +> +../util/async.c:172 +> +#29 0x0000557eb47d1df5 in aio_bh_poll (ctx=0x557eb76c0200) at +> +../util/async.c:219 +> +#30 0x0000557eb47b12f3 in aio_dispatch (ctx=0x557eb76c0200) at +> +../util/aio-posix.c:436 +> +#31 0x0000557eb47d2266 in aio_ctx_dispatch (source=0x557eb76c0200, +> +callback=0x0, user_data=0x0) at ../util/async.c:361 +> +#32 0x00007fc549232f4f in g_main_dispatch (context=0x557eb76c6430) at +> +../glib/gmain.c:3364 +> +#33 g_main_context_dispatch (context=0x557eb76c6430) at ../glib/gmain.c:4079 +> +#34 0x0000557eb47d3ab1 in glib_pollfds_poll () at ../util/main-loop.c:287 +> +#35 0x0000557eb47d3b38 in os_host_main_loop_wait (timeout=0) at +> +../util/main-loop.c:310 +> +#36 0x0000557eb47d3c58 in main_loop_wait (nonblocking=0) at +> +../util/main-loop.c:589 +> +#37 0x0000557eb4218b01 in qemu_main_loop () at ../system/runstate.c:835 +> +#38 0x0000557eb46df166 in qemu_default_main (opaque=0x0) at +> +../system/main.c:50 +> +#39 0x0000557eb46df215 in main (argc=24, argv=0x7ffd94b4f8d8) at +> +../system/main.c:80 +And here's coroutine trying to acquire read lock: + +> +(gdb) qemu coroutine reader_queue->entries.sqh_first +> +#0 0x0000557eb47d7068 in qemu_coroutine_switch (from_=0x557eb7aa48b0, +> +to_=0x7fc537fff508, action=COROUTINE_YIELD) at +> +../util/coroutine-ucontext.c:321 +> +#1 0x0000557eb47d4d4a in qemu_coroutine_yield () at +> +../util/qemu-coroutine.c:339 +> +#2 0x0000557eb47d56c8 in qemu_co_queue_wait_impl (queue=0x557eb59954c0 +> +, lock=0x7fc53c57de50, flags=0) at +> +../util/qemu-coroutine-lock.c:60 +> +#3 0x0000557eb461fea7 in bdrv_graph_co_rdlock () at ../block/graph-lock.c:231 +> +#4 0x0000557eb460c81a in graph_lockable_auto_lock (x=0x7fc53c57dee3) at +> +/home/root/src/qemu/master/include/block/graph-lock.h:213 +> +#5 0x0000557eb460fa41 in blk_co_do_preadv_part +> +(blk=0x557eb84c0810, offset=6890553344, bytes=4096, qiov=0x7fc530006988, +> +qiov_offset=0, flags=BDRV_REQ_REGISTERED_BUF) at ../block/block-backend.c:1339 +> +#6 0x0000557eb46104d7 in blk_aio_read_entry (opaque=0x7fc530003240) at +> +../block/block-backend.c:1619 +> +#7 0x0000557eb47d6c40 in coroutine_trampoline (i0=-1213577040, i1=21886) at +> +../util/coroutine-ucontext.c:175 +> +#8 0x00007fc547c2a360 in __start_context () at +> +../sysdeps/unix/sysv/linux/x86_64/__start_context.S:91 +> +#9 0x00007ffd94b4ea40 in () +> +#10 0x0000000000000000 in () +So it looks like main thread is processing job-dismiss request and is +holding write lock taken in block_job_remove_all_bdrv() (frame #20 +above). At the same time iothread spawns a coroutine which performs IO +request. Before the coroutine is spawned, blk_aio_prwv() increases +'in_flight' counter for Blk. Then blk_co_do_preadv_part() (frame #5) is +trying to acquire the read lock. But main thread isn't releasing the +lock as blk_root_drained_poll() returns true since blk->in_flight > 0. +Here's the deadlock. + +Any comments and suggestions on the subject are welcomed. Thanks! + +Andrey +remove_lower_snap.sh +Description: +application/shellscript + +On 4/24/25 8:32 PM, Andrey Drobyshev wrote: +> +Hi all, +> +> +There's a bug in block layer which leads to block graph deadlock. +> +Notably, it takes place when blockdev IO is processed within a separate +> +iothread. +> +> +This was initially caught by our tests, and I was able to reduce it to a +> +relatively simple reproducer. Such deadlocks are probably supposed to +> +be covered in iotests/graph-changes-while-io, but this deadlock isn't. +> +> +Basically what the reproducer does is launches QEMU with a drive having +> +'iothread' option set, creates a chain of 2 snapshots, launches +> +block-commit job for a snapshot and then dismisses the job, starting +> +from the lower snapshot. If the guest is issuing IO at the same time, +> +there's a race in acquiring block graph lock and a potential deadlock. +> +> +Here's how it can be reproduced: +> +> +[...] +> +I took a closer look at iotests/graph-changes-while-io, and have managed +to reproduce the same deadlock in a much simpler setup, without a guest. + +1. Run QSD:> ./build/storage-daemon/qemu-storage-daemon --object +iothread,id=iothread0 \ +> +--blockdev null-co,node-name=node0,read-zeroes=true \ +> +> +--nbd-server addr.type=unix,addr.path=/var/run/qsd_nbd.sock \ +> +> +--export +> +nbd,id=exp0,node-name=node0,iothread=iothread0,fixed-iothread=true,writable=true +> +\ +> +--chardev +> +socket,id=qmp-sock,path=/var/run/qsd_qmp.sock,server=on,wait=off \ +> +--monitor chardev=qmp-sock +2. Launch IO: +> +qemu-img bench -f raw -c 2000000 +> +'nbd+unix:///node0?socket=/var/run/qsd_nbd.sock' +3. Add 2 snapshots and remove lower one (script attached):> while +/bin/true ; do ./rls_qsd.sh ; done + +And then it hangs. + +I'll also send a patch with corresponding test case added directly to +iotests. + +This reproduce seems to be hanging starting from Fiona's commit +67446e605dc ("blockjob: drop AioContext lock before calling +bdrv_graph_wrlock()"). AioContext locks were dropped entirely later on +in Stefan's commit b49f4755c7 ("block: remove AioContext locking"), but +the problem remains. + +Andrey +rls_qsd.sh +Description: +application/shellscript + +From: Andrey Drobyshev + +This case is catching potential deadlock which takes place when job-dismiss +is issued when I/O requests are processed in a separate iothread. + +See +https://mail.gnu.org/archive/html/qemu-devel/2025-04/msg04421.html +Signed-off-by: Andrey Drobyshev +--- + .../qemu-iotests/tests/graph-changes-while-io | 101 ++++++++++++++++-- + .../tests/graph-changes-while-io.out | 4 +- + 2 files changed, 96 insertions(+), 9 deletions(-) + +diff --git a/tests/qemu-iotests/tests/graph-changes-while-io +b/tests/qemu-iotests/tests/graph-changes-while-io +index 194fda500e..e30f823da4 100755 +--- a/tests/qemu-iotests/tests/graph-changes-while-io ++++ b/tests/qemu-iotests/tests/graph-changes-while-io +@@ -27,6 +27,8 @@ from iotests import imgfmt, qemu_img, qemu_img_create, +qemu_io, \ + + + top = os.path.join(iotests.test_dir, 'top.img') ++snap1 = os.path.join(iotests.test_dir, 'snap1.img') ++snap2 = os.path.join(iotests.test_dir, 'snap2.img') + nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock') + + +@@ -58,6 +60,15 @@ class TestGraphChangesWhileIO(QMPTestCase): + def tearDown(self) -> None: + self.qsd.stop() + ++ def _wait_for_blockjob(self, status) -> None: ++ done = False ++ while not done: ++ for event in self.qsd.get_qmp().get_events(wait=10.0): ++ if event['event'] != 'JOB_STATUS_CHANGE': ++ continue ++ if event['data']['status'] == status: ++ done = True ++ + def test_blockdev_add_while_io(self) -> None: + # Run qemu-img bench in the background + bench_thr = Thread(target=do_qemu_img_bench) +@@ -116,13 +127,89 @@ class TestGraphChangesWhileIO(QMPTestCase): + 'device': 'job0', + }) + +- cancelled = False +- while not cancelled: +- for event in self.qsd.get_qmp().get_events(wait=10.0): +- if event['event'] != 'JOB_STATUS_CHANGE': +- continue +- if event['data']['status'] == 'null': +- cancelled = True ++ self._wait_for_blockjob('null') ++ ++ bench_thr.join() ++ ++ def test_remove_lower_snapshot_while_io(self) -> None: ++ # Run qemu-img bench in the background ++ bench_thr = Thread(target=do_qemu_img_bench, args=(100000, )) ++ bench_thr.start() ++ ++ # While I/O is performed on 'node0' node, consequently add 2 snapshots ++ # on top of it, then remove (commit) them starting from lower one. ++ while bench_thr.is_alive(): ++ # Recreate snapshot images on every iteration ++ qemu_img_create('-f', imgfmt, snap1, '1G') ++ qemu_img_create('-f', imgfmt, snap2, '1G') ++ ++ self.qsd.cmd('blockdev-add', { ++ 'driver': imgfmt, ++ 'node-name': 'snap1', ++ 'file': { ++ 'driver': 'file', ++ 'filename': snap1 ++ } ++ }) ++ ++ self.qsd.cmd('blockdev-snapshot', { ++ 'node': 'node0', ++ 'overlay': 'snap1', ++ }) ++ ++ self.qsd.cmd('blockdev-add', { ++ 'driver': imgfmt, ++ 'node-name': 'snap2', ++ 'file': { ++ 'driver': 'file', ++ 'filename': snap2 ++ } ++ }) ++ ++ self.qsd.cmd('blockdev-snapshot', { ++ 'node': 'snap1', ++ 'overlay': 'snap2', ++ }) ++ ++ self.qsd.cmd('block-commit', { ++ 'job-id': 'commit-snap1', ++ 'device': 'snap2', ++ 'top-node': 'snap1', ++ 'base-node': 'node0', ++ 'auto-finalize': True, ++ 'auto-dismiss': False, ++ }) ++ ++ self._wait_for_blockjob('concluded') ++ self.qsd.cmd('job-dismiss', { ++ 'id': 'commit-snap1', ++ }) ++ ++ self.qsd.cmd('block-commit', { ++ 'job-id': 'commit-snap2', ++ 'device': 'snap2', ++ 'top-node': 'snap2', ++ 'base-node': 'node0', ++ 'auto-finalize': True, ++ 'auto-dismiss': False, ++ }) ++ ++ self._wait_for_blockjob('ready') ++ self.qsd.cmd('job-complete', { ++ 'id': 'commit-snap2', ++ }) ++ ++ self._wait_for_blockjob('concluded') ++ self.qsd.cmd('job-dismiss', { ++ 'id': 'commit-snap2', ++ }) ++ ++ self.qsd.cmd('blockdev-del', { ++ 'node-name': 'snap1' ++ }) ++ self.qsd.cmd('blockdev-del', { ++ 'node-name': 'snap2' ++ }) + + bench_thr.join() + +diff --git a/tests/qemu-iotests/tests/graph-changes-while-io.out +b/tests/qemu-iotests/tests/graph-changes-while-io.out +index fbc63e62f8..8d7e996700 100644 +--- a/tests/qemu-iotests/tests/graph-changes-while-io.out ++++ b/tests/qemu-iotests/tests/graph-changes-while-io.out +@@ -1,5 +1,5 @@ +-.. ++... + ---------------------------------------------------------------------- +-Ran 2 tests ++Ran 3 tests + + OK +-- +2.43.5 + +Am 24.04.25 um 19:32 schrieb Andrey Drobyshev: +> +So it looks like main thread is processing job-dismiss request and is +> +holding write lock taken in block_job_remove_all_bdrv() (frame #20 +> +above). At the same time iothread spawns a coroutine which performs IO +> +request. Before the coroutine is spawned, blk_aio_prwv() increases +> +'in_flight' counter for Blk. Then blk_co_do_preadv_part() (frame #5) is +> +trying to acquire the read lock. But main thread isn't releasing the +> +lock as blk_root_drained_poll() returns true since blk->in_flight > 0. +> +Here's the deadlock. +And for the IO test you provided, it's client->nb_requests that behaves +similarly to blk->in_flight here. + +The issue also reproduces easily when issuing the following QMP command +in a loop while doing IO on a device: + +> +void qmp_block_locked_drain(const char *node_name, Error **errp) +> +{ +> +BlockDriverState *bs; +> +> +bs = bdrv_find_node(node_name); +> +if (!bs) { +> +error_setg(errp, "node not found"); +> +return; +> +} +> +> +bdrv_graph_wrlock(); +> +bdrv_drained_begin(bs); +> +bdrv_drained_end(bs); +> +bdrv_graph_wrunlock(); +> +} +It seems like either it would be necessary to require: +1. not draining inside an exclusively locked section +or +2. making sure that variables used by drained_poll routines are only set +while holding the reader lock +? + +Those seem to require rather involved changes, so a third option might +be to make draining inside an exclusively locked section possible, by +embedding such locked sections in a drained section: + +> +diff --git a/blockjob.c b/blockjob.c +> +index 32007f31a9..9b2f3b3ea9 100644 +> +--- a/blockjob.c +> ++++ b/blockjob.c +> +@@ -198,6 +198,7 @@ void block_job_remove_all_bdrv(BlockJob *job) +> +* one to make sure that such a concurrent access does not attempt +> +* to process an already freed BdrvChild. +> +*/ +> ++ bdrv_drain_all_begin(); +> +bdrv_graph_wrlock(); +> +while (job->nodes) { +> +GSList *l = job->nodes; +> +@@ -211,6 +212,7 @@ void block_job_remove_all_bdrv(BlockJob *job) +> +g_slist_free_1(l); +> +} +> +bdrv_graph_wrunlock(); +> ++ bdrv_drain_all_end(); +> +} +> +> +bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs) +This seems to fix the issue at hand. I can send a patch if this is +considered an acceptable approach. + +Best Regards, +Fiona + +On 4/30/25 11:47 AM, Fiona Ebner wrote: +> +Am 24.04.25 um 19:32 schrieb Andrey Drobyshev: +> +> So it looks like main thread is processing job-dismiss request and is +> +> holding write lock taken in block_job_remove_all_bdrv() (frame #20 +> +> above). At the same time iothread spawns a coroutine which performs IO +> +> request. Before the coroutine is spawned, blk_aio_prwv() increases +> +> 'in_flight' counter for Blk. Then blk_co_do_preadv_part() (frame #5) is +> +> trying to acquire the read lock. But main thread isn't releasing the +> +> lock as blk_root_drained_poll() returns true since blk->in_flight > 0. +> +> Here's the deadlock. +> +> +And for the IO test you provided, it's client->nb_requests that behaves +> +similarly to blk->in_flight here. +> +> +The issue also reproduces easily when issuing the following QMP command +> +in a loop while doing IO on a device: +> +> +> void qmp_block_locked_drain(const char *node_name, Error **errp) +> +> { +> +> BlockDriverState *bs; +> +> +> +> bs = bdrv_find_node(node_name); +> +> if (!bs) { +> +> error_setg(errp, "node not found"); +> +> return; +> +> } +> +> +> +> bdrv_graph_wrlock(); +> +> bdrv_drained_begin(bs); +> +> bdrv_drained_end(bs); +> +> bdrv_graph_wrunlock(); +> +> } +> +> +It seems like either it would be necessary to require: +> +1. not draining inside an exclusively locked section +> +or +> +2. making sure that variables used by drained_poll routines are only set +> +while holding the reader lock +> +? +> +> +Those seem to require rather involved changes, so a third option might +> +be to make draining inside an exclusively locked section possible, by +> +embedding such locked sections in a drained section: +> +> +> diff --git a/blockjob.c b/blockjob.c +> +> index 32007f31a9..9b2f3b3ea9 100644 +> +> --- a/blockjob.c +> +> +++ b/blockjob.c +> +> @@ -198,6 +198,7 @@ void block_job_remove_all_bdrv(BlockJob *job) +> +> * one to make sure that such a concurrent access does not attempt +> +> * to process an already freed BdrvChild. +> +> */ +> +> + bdrv_drain_all_begin(); +> +> bdrv_graph_wrlock(); +> +> while (job->nodes) { +> +> GSList *l = job->nodes; +> +> @@ -211,6 +212,7 @@ void block_job_remove_all_bdrv(BlockJob *job) +> +> g_slist_free_1(l); +> +> } +> +> bdrv_graph_wrunlock(); +> +> + bdrv_drain_all_end(); +> +> } +> +> +> +> bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs) +> +> +This seems to fix the issue at hand. I can send a patch if this is +> +considered an acceptable approach. +> +> +Best Regards, +> +Fiona +> +Hello Fiona, + +Thanks for looking into it. I've tried your 3rd option above and can +confirm it does fix the deadlock, at least I can't reproduce it. Other +iotests also don't seem to be breaking. So I personally am fine with +that patch. Would be nice to hear a word from the maintainers though on +whether there're any caveats with such approach. + +Andrey + +On Wed, Apr 30, 2025 at 10:11 AM Andrey Drobyshev + wrote: +> +> +On 4/30/25 11:47 AM, Fiona Ebner wrote: +> +> Am 24.04.25 um 19:32 schrieb Andrey Drobyshev: +> +>> So it looks like main thread is processing job-dismiss request and is +> +>> holding write lock taken in block_job_remove_all_bdrv() (frame #20 +> +>> above). At the same time iothread spawns a coroutine which performs IO +> +>> request. Before the coroutine is spawned, blk_aio_prwv() increases +> +>> 'in_flight' counter for Blk. Then blk_co_do_preadv_part() (frame #5) is +> +>> trying to acquire the read lock. But main thread isn't releasing the +> +>> lock as blk_root_drained_poll() returns true since blk->in_flight > 0. +> +>> Here's the deadlock. +> +> +> +> And for the IO test you provided, it's client->nb_requests that behaves +> +> similarly to blk->in_flight here. +> +> +> +> The issue also reproduces easily when issuing the following QMP command +> +> in a loop while doing IO on a device: +> +> +> +>> void qmp_block_locked_drain(const char *node_name, Error **errp) +> +>> { +> +>> BlockDriverState *bs; +> +>> +> +>> bs = bdrv_find_node(node_name); +> +>> if (!bs) { +> +>> error_setg(errp, "node not found"); +> +>> return; +> +>> } +> +>> +> +>> bdrv_graph_wrlock(); +> +>> bdrv_drained_begin(bs); +> +>> bdrv_drained_end(bs); +> +>> bdrv_graph_wrunlock(); +> +>> } +> +> +> +> It seems like either it would be necessary to require: +> +> 1. not draining inside an exclusively locked section +> +> or +> +> 2. making sure that variables used by drained_poll routines are only set +> +> while holding the reader lock +> +> ? +> +> +> +> Those seem to require rather involved changes, so a third option might +> +> be to make draining inside an exclusively locked section possible, by +> +> embedding such locked sections in a drained section: +> +> +> +>> diff --git a/blockjob.c b/blockjob.c +> +>> index 32007f31a9..9b2f3b3ea9 100644 +> +>> --- a/blockjob.c +> +>> +++ b/blockjob.c +> +>> @@ -198,6 +198,7 @@ void block_job_remove_all_bdrv(BlockJob *job) +> +>> * one to make sure that such a concurrent access does not attempt +> +>> * to process an already freed BdrvChild. +> +>> */ +> +>> + bdrv_drain_all_begin(); +> +>> bdrv_graph_wrlock(); +> +>> while (job->nodes) { +> +>> GSList *l = job->nodes; +> +>> @@ -211,6 +212,7 @@ void block_job_remove_all_bdrv(BlockJob *job) +> +>> g_slist_free_1(l); +> +>> } +> +>> bdrv_graph_wrunlock(); +> +>> + bdrv_drain_all_end(); +> +>> } +> +>> +> +>> bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs) +> +> +> +> This seems to fix the issue at hand. I can send a patch if this is +> +> considered an acceptable approach. +Kevin is aware of this thread but it's a public holiday tomorrow so it +may be a little longer. + +Stefan + +Am 24.04.2025 um 19:32 hat Andrey Drobyshev geschrieben: +> +Hi all, +> +> +There's a bug in block layer which leads to block graph deadlock. +> +Notably, it takes place when blockdev IO is processed within a separate +> +iothread. +> +> +This was initially caught by our tests, and I was able to reduce it to a +> +relatively simple reproducer. Such deadlocks are probably supposed to +> +be covered in iotests/graph-changes-while-io, but this deadlock isn't. +> +> +Basically what the reproducer does is launches QEMU with a drive having +> +'iothread' option set, creates a chain of 2 snapshots, launches +> +block-commit job for a snapshot and then dismisses the job, starting +> +from the lower snapshot. If the guest is issuing IO at the same time, +> +there's a race in acquiring block graph lock and a potential deadlock. +> +> +Here's how it can be reproduced: +> +> +1. Run QEMU: +> +> SRCDIR=/path/to/srcdir +> +> +> +> +> +> +> +> +> +> $SRCDIR/build/qemu-system-x86_64 -enable-kvm \ +> +> +> +> -machine q35 -cpu Nehalem \ +> +> +> +> -name guest=alma8-vm,debug-threads=on \ +> +> +> +> -m 2g -smp 2 \ +> +> +> +> -nographic -nodefaults \ +> +> +> +> -qmp unix:/var/run/alma8-qmp.sock,server=on,wait=off \ +> +> +> +> -serial unix:/var/run/alma8-serial.sock,server=on,wait=off \ +> +> +> +> -object iothread,id=iothread0 \ +> +> +> +> -blockdev +> +> node-name=disk,driver=qcow2,file.driver=file,file.filename=/path/to/img/alma8.qcow2 +> +> \ +> +> -device virtio-blk-pci,drive=disk,iothread=iothread0 +> +> +2. Launch IO (random reads) from within the guest: +> +> nc -U /var/run/alma8-serial.sock +> +> ... +> +> [root@alma8-vm ~]# fio --name=randread --ioengine=libaio --direct=1 --bs=4k +> +> --size=1G --numjobs=1 --time_based=1 --runtime=300 --group_reporting +> +> --rw=randread --iodepth=1 --filename=/testfile +> +> +3. Run snapshots creation & removal of lower snapshot operation in a +> +loop (script attached): +> +> while /bin/true ; do ./remove_lower_snap.sh ; done +> +> +And then it occasionally hangs. +> +> +Note: I've tried bisecting this, and looks like deadlock occurs starting +> +from the following commit: +> +> +(BAD) 5bdbaebcce virtio: Re-enable notifications after drain +> +(GOOD) c42c3833e0 virtio-scsi: Attach event vq notifier with no_poll +> +> +On the latest v10.0.0 it does hang as well. +> +> +> +Here's backtrace of the main thread: +> +> +> #0 0x00007fc547d427ce in __ppoll (fds=0x557eb79657b0, nfds=1, +> +> timeout=, sigmask=0x0) at +> +> ../sysdeps/unix/sysv/linux/ppoll.c:43 +> +> #1 0x0000557eb47d955c in qemu_poll_ns (fds=0x557eb79657b0, nfds=1, +> +> timeout=-1) at ../util/qemu-timer.c:329 +> +> #2 0x0000557eb47b2204 in fdmon_poll_wait (ctx=0x557eb76c5f20, +> +> ready_list=0x7ffd94b4edd8, timeout=-1) at ../util/fdmon-poll.c:79 +> +> #3 0x0000557eb47b1c45 in aio_poll (ctx=0x557eb76c5f20, blocking=true) at +> +> ../util/aio-posix.c:730 +> +> #4 0x0000557eb4621edd in bdrv_do_drained_begin (bs=0x557eb795e950, +> +> parent=0x0, poll=true) at ../block/io.c:378 +> +> #5 0x0000557eb4621f7b in bdrv_drained_begin (bs=0x557eb795e950) at +> +> ../block/io.c:391 +> +> #6 0x0000557eb45ec125 in bdrv_change_aio_context (bs=0x557eb795e950, +> +> ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +> +> errp=0x0) +> +> at ../block.c:7682 +> +> #7 0x0000557eb45ebf2b in bdrv_child_change_aio_context (c=0x557eb7964250, +> +> ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +> +> errp=0x0) +> +> at ../block.c:7608 +> +> #8 0x0000557eb45ec0c4 in bdrv_change_aio_context (bs=0x557eb79575e0, +> +> ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +> +> errp=0x0) +> +> at ../block.c:7668 +> +> #9 0x0000557eb45ebf2b in bdrv_child_change_aio_context (c=0x557eb7e59110, +> +> ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +> +> errp=0x0) +> +> at ../block.c:7608 +> +> #10 0x0000557eb45ec0c4 in bdrv_change_aio_context (bs=0x557eb7e51960, +> +> ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +> +> errp=0x0) +> +> at ../block.c:7668 +> +> #11 0x0000557eb45ebf2b in bdrv_child_change_aio_context (c=0x557eb814ed80, +> +> ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +> +> errp=0x0) +> +> at ../block.c:7608 +> +> #12 0x0000557eb45ee8e4 in child_job_change_aio_ctx (c=0x557eb7c9d3f0, +> +> ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +> +> errp=0x0) +> +> at ../blockjob.c:157 +> +> #13 0x0000557eb45ebe2d in bdrv_parent_change_aio_context (c=0x557eb7c9d3f0, +> +> ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +> +> errp=0x0) +> +> at ../block.c:7592 +> +> #14 0x0000557eb45ec06b in bdrv_change_aio_context (bs=0x557eb7d74310, +> +> ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +> +> errp=0x0) +> +> at ../block.c:7661 +> +> #15 0x0000557eb45dcd7e in bdrv_child_cb_change_aio_ctx +> +> (child=0x557eb8565af0, ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = +> +> {...}, tran=0x557eb7a87160, errp=0x0) at ../block.c:1234 +> +> #16 0x0000557eb45ebe2d in bdrv_parent_change_aio_context (c=0x557eb8565af0, +> +> ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +> +> errp=0x0) +> +> at ../block.c:7592 +> +> #17 0x0000557eb45ec06b in bdrv_change_aio_context (bs=0x557eb79575e0, +> +> ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +> +> errp=0x0) +> +> at ../block.c:7661 +> +> #18 0x0000557eb45ec1f3 in bdrv_try_change_aio_context (bs=0x557eb79575e0, +> +> ctx=0x557eb76c5f20, ignore_child=0x0, errp=0x0) at ../block.c:7715 +> +> #19 0x0000557eb45e1b15 in bdrv_root_unref_child (child=0x557eb7966f30) at +> +> ../block.c:3317 +> +> #20 0x0000557eb45eeaa8 in block_job_remove_all_bdrv (job=0x557eb7952800) at +> +> ../blockjob.c:209 +> +> #21 0x0000557eb45ee641 in block_job_free (job=0x557eb7952800) at +> +> ../blockjob.c:82 +> +> #22 0x0000557eb45f17af in job_unref_locked (job=0x557eb7952800) at +> +> ../job.c:474 +> +> #23 0x0000557eb45f257d in job_do_dismiss_locked (job=0x557eb7952800) at +> +> ../job.c:771 +> +> #24 0x0000557eb45f25fe in job_dismiss_locked (jobptr=0x7ffd94b4f400, +> +> errp=0x7ffd94b4f488) at ../job.c:783 +> +> --Type for more, q to quit, c to continue without paging-- +> +> #25 0x0000557eb45d8e84 in qmp_job_dismiss (id=0x557eb7aa42b0 +> +> "commit-snap1", errp=0x7ffd94b4f488) at ../job-qmp.c:138 +> +> #26 0x0000557eb472f6a3 in qmp_marshal_job_dismiss (args=0x7fc52c00a3b0, +> +> ret=0x7fc53c880da8, errp=0x7fc53c880da0) at qapi/qapi-commands-job.c:221 +> +> #27 0x0000557eb47a35f3 in do_qmp_dispatch_bh (opaque=0x7fc53c880e40) at +> +> ../qapi/qmp-dispatch.c:128 +> +> #28 0x0000557eb47d1cd2 in aio_bh_call (bh=0x557eb79568f0) at +> +> ../util/async.c:172 +> +> #29 0x0000557eb47d1df5 in aio_bh_poll (ctx=0x557eb76c0200) at +> +> ../util/async.c:219 +> +> #30 0x0000557eb47b12f3 in aio_dispatch (ctx=0x557eb76c0200) at +> +> ../util/aio-posix.c:436 +> +> #31 0x0000557eb47d2266 in aio_ctx_dispatch (source=0x557eb76c0200, +> +> callback=0x0, user_data=0x0) at ../util/async.c:361 +> +> #32 0x00007fc549232f4f in g_main_dispatch (context=0x557eb76c6430) at +> +> ../glib/gmain.c:3364 +> +> #33 g_main_context_dispatch (context=0x557eb76c6430) at ../glib/gmain.c:4079 +> +> #34 0x0000557eb47d3ab1 in glib_pollfds_poll () at ../util/main-loop.c:287 +> +> #35 0x0000557eb47d3b38 in os_host_main_loop_wait (timeout=0) at +> +> ../util/main-loop.c:310 +> +> #36 0x0000557eb47d3c58 in main_loop_wait (nonblocking=0) at +> +> ../util/main-loop.c:589 +> +> #37 0x0000557eb4218b01 in qemu_main_loop () at ../system/runstate.c:835 +> +> #38 0x0000557eb46df166 in qemu_default_main (opaque=0x0) at +> +> ../system/main.c:50 +> +> #39 0x0000557eb46df215 in main (argc=24, argv=0x7ffd94b4f8d8) at +> +> ../system/main.c:80 +> +> +> +And here's coroutine trying to acquire read lock: +> +> +> (gdb) qemu coroutine reader_queue->entries.sqh_first +> +> #0 0x0000557eb47d7068 in qemu_coroutine_switch (from_=0x557eb7aa48b0, +> +> to_=0x7fc537fff508, action=COROUTINE_YIELD) at +> +> ../util/coroutine-ucontext.c:321 +> +> #1 0x0000557eb47d4d4a in qemu_coroutine_yield () at +> +> ../util/qemu-coroutine.c:339 +> +> #2 0x0000557eb47d56c8 in qemu_co_queue_wait_impl (queue=0x557eb59954c0 +> +> , lock=0x7fc53c57de50, flags=0) at +> +> ../util/qemu-coroutine-lock.c:60 +> +> #3 0x0000557eb461fea7 in bdrv_graph_co_rdlock () at +> +> ../block/graph-lock.c:231 +> +> #4 0x0000557eb460c81a in graph_lockable_auto_lock (x=0x7fc53c57dee3) at +> +> /home/root/src/qemu/master/include/block/graph-lock.h:213 +> +> #5 0x0000557eb460fa41 in blk_co_do_preadv_part +> +> (blk=0x557eb84c0810, offset=6890553344, bytes=4096, +> +> qiov=0x7fc530006988, qiov_offset=0, flags=BDRV_REQ_REGISTERED_BUF) at +> +> ../block/block-backend.c:1339 +> +> #6 0x0000557eb46104d7 in blk_aio_read_entry (opaque=0x7fc530003240) at +> +> ../block/block-backend.c:1619 +> +> #7 0x0000557eb47d6c40 in coroutine_trampoline (i0=-1213577040, i1=21886) +> +> at ../util/coroutine-ucontext.c:175 +> +> #8 0x00007fc547c2a360 in __start_context () at +> +> ../sysdeps/unix/sysv/linux/x86_64/__start_context.S:91 +> +> #9 0x00007ffd94b4ea40 in () +> +> #10 0x0000000000000000 in () +> +> +> +So it looks like main thread is processing job-dismiss request and is +> +holding write lock taken in block_job_remove_all_bdrv() (frame #20 +> +above). At the same time iothread spawns a coroutine which performs IO +> +request. Before the coroutine is spawned, blk_aio_prwv() increases +> +'in_flight' counter for Blk. Then blk_co_do_preadv_part() (frame #5) is +> +trying to acquire the read lock. But main thread isn't releasing the +> +lock as blk_root_drained_poll() returns true since blk->in_flight > 0. +> +Here's the deadlock. +> +> +Any comments and suggestions on the subject are welcomed. Thanks! +I think this is what the blk_wait_while_drained() call was supposed to +address in blk_co_do_preadv_part(). However, with the use of multiple +I/O threads, this is racy. + +Do you think that in your case we hit the small race window between the +checks in blk_wait_while_drained() and GRAPH_RDLOCK_GUARD()? Or is there +another reason why blk_wait_while_drained() didn't do its job? + +Kevin + +On 5/2/25 19:34, Kevin Wolf wrote: +Am 24.04.2025 um 19:32 hat Andrey Drobyshev geschrieben: +Hi all, + +There's a bug in block layer which leads to block graph deadlock. +Notably, it takes place when blockdev IO is processed within a separate +iothread. + +This was initially caught by our tests, and I was able to reduce it to a +relatively simple reproducer. Such deadlocks are probably supposed to +be covered in iotests/graph-changes-while-io, but this deadlock isn't. + +Basically what the reproducer does is launches QEMU with a drive having +'iothread' option set, creates a chain of 2 snapshots, launches +block-commit job for a snapshot and then dismisses the job, starting +from the lower snapshot. If the guest is issuing IO at the same time, +there's a race in acquiring block graph lock and a potential deadlock. + +Here's how it can be reproduced: + +1. Run QEMU: +SRCDIR=/path/to/srcdir +$SRCDIR/build/qemu-system-x86_64 -enable-kvm \ +-machine q35 -cpu Nehalem \ + -name guest=alma8-vm,debug-threads=on \ + -m 2g -smp 2 \ + -nographic -nodefaults \ + -qmp unix:/var/run/alma8-qmp.sock,server=on,wait=off \ + -serial unix:/var/run/alma8-serial.sock,server=on,wait=off \ + -object iothread,id=iothread0 \ + -blockdev +node-name=disk,driver=qcow2,file.driver=file,file.filename=/path/to/img/alma8.qcow2 + \ + -device virtio-blk-pci,drive=disk,iothread=iothread0 +2. Launch IO (random reads) from within the guest: +nc -U /var/run/alma8-serial.sock +... +[root@alma8-vm ~]# fio --name=randread --ioengine=libaio --direct=1 --bs=4k +--size=1G --numjobs=1 --time_based=1 --runtime=300 --group_reporting +--rw=randread --iodepth=1 --filename=/testfile +3. Run snapshots creation & removal of lower snapshot operation in a +loop (script attached): +while /bin/true ; do ./remove_lower_snap.sh ; done +And then it occasionally hangs. + +Note: I've tried bisecting this, and looks like deadlock occurs starting +from the following commit: + +(BAD) 5bdbaebcce virtio: Re-enable notifications after drain +(GOOD) c42c3833e0 virtio-scsi: Attach event vq notifier with no_poll + +On the latest v10.0.0 it does hang as well. + + +Here's backtrace of the main thread: +#0 0x00007fc547d427ce in __ppoll (fds=0x557eb79657b0, nfds=1, timeout=, sigmask=0x0) at ../sysdeps/unix/sysv/linux/ppoll.c:43 +#1 0x0000557eb47d955c in qemu_poll_ns (fds=0x557eb79657b0, nfds=1, timeout=-1) +at ../util/qemu-timer.c:329 +#2 0x0000557eb47b2204 in fdmon_poll_wait (ctx=0x557eb76c5f20, +ready_list=0x7ffd94b4edd8, timeout=-1) at ../util/fdmon-poll.c:79 +#3 0x0000557eb47b1c45 in aio_poll (ctx=0x557eb76c5f20, blocking=true) at +../util/aio-posix.c:730 +#4 0x0000557eb4621edd in bdrv_do_drained_begin (bs=0x557eb795e950, parent=0x0, +poll=true) at ../block/io.c:378 +#5 0x0000557eb4621f7b in bdrv_drained_begin (bs=0x557eb795e950) at +../block/io.c:391 +#6 0x0000557eb45ec125 in bdrv_change_aio_context (bs=0x557eb795e950, +ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +errp=0x0) + at ../block.c:7682 +#7 0x0000557eb45ebf2b in bdrv_child_change_aio_context (c=0x557eb7964250, +ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +errp=0x0) + at ../block.c:7608 +#8 0x0000557eb45ec0c4 in bdrv_change_aio_context (bs=0x557eb79575e0, +ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +errp=0x0) + at ../block.c:7668 +#9 0x0000557eb45ebf2b in bdrv_child_change_aio_context (c=0x557eb7e59110, +ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +errp=0x0) + at ../block.c:7608 +#10 0x0000557eb45ec0c4 in bdrv_change_aio_context (bs=0x557eb7e51960, +ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +errp=0x0) + at ../block.c:7668 +#11 0x0000557eb45ebf2b in bdrv_child_change_aio_context (c=0x557eb814ed80, +ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +errp=0x0) + at ../block.c:7608 +#12 0x0000557eb45ee8e4 in child_job_change_aio_ctx (c=0x557eb7c9d3f0, +ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +errp=0x0) + at ../blockjob.c:157 +#13 0x0000557eb45ebe2d in bdrv_parent_change_aio_context (c=0x557eb7c9d3f0, +ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +errp=0x0) + at ../block.c:7592 +#14 0x0000557eb45ec06b in bdrv_change_aio_context (bs=0x557eb7d74310, +ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +errp=0x0) + at ../block.c:7661 +#15 0x0000557eb45dcd7e in bdrv_child_cb_change_aio_ctx + (child=0x557eb8565af0, ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, +tran=0x557eb7a87160, errp=0x0) at ../block.c:1234 +#16 0x0000557eb45ebe2d in bdrv_parent_change_aio_context (c=0x557eb8565af0, +ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +errp=0x0) + at ../block.c:7592 +#17 0x0000557eb45ec06b in bdrv_change_aio_context (bs=0x557eb79575e0, +ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, +errp=0x0) + at ../block.c:7661 +#18 0x0000557eb45ec1f3 in bdrv_try_change_aio_context (bs=0x557eb79575e0, +ctx=0x557eb76c5f20, ignore_child=0x0, errp=0x0) at ../block.c:7715 +#19 0x0000557eb45e1b15 in bdrv_root_unref_child (child=0x557eb7966f30) at +../block.c:3317 +#20 0x0000557eb45eeaa8 in block_job_remove_all_bdrv (job=0x557eb7952800) at +../blockjob.c:209 +#21 0x0000557eb45ee641 in block_job_free (job=0x557eb7952800) at +../blockjob.c:82 +#22 0x0000557eb45f17af in job_unref_locked (job=0x557eb7952800) at ../job.c:474 +#23 0x0000557eb45f257d in job_do_dismiss_locked (job=0x557eb7952800) at +../job.c:771 +#24 0x0000557eb45f25fe in job_dismiss_locked (jobptr=0x7ffd94b4f400, +errp=0x7ffd94b4f488) at ../job.c:783 +--Type for more, q to quit, c to continue without paging-- +#25 0x0000557eb45d8e84 in qmp_job_dismiss (id=0x557eb7aa42b0 "commit-snap1", +errp=0x7ffd94b4f488) at ../job-qmp.c:138 +#26 0x0000557eb472f6a3 in qmp_marshal_job_dismiss (args=0x7fc52c00a3b0, +ret=0x7fc53c880da8, errp=0x7fc53c880da0) at qapi/qapi-commands-job.c:221 +#27 0x0000557eb47a35f3 in do_qmp_dispatch_bh (opaque=0x7fc53c880e40) at +../qapi/qmp-dispatch.c:128 +#28 0x0000557eb47d1cd2 in aio_bh_call (bh=0x557eb79568f0) at ../util/async.c:172 +#29 0x0000557eb47d1df5 in aio_bh_poll (ctx=0x557eb76c0200) at +../util/async.c:219 +#30 0x0000557eb47b12f3 in aio_dispatch (ctx=0x557eb76c0200) at +../util/aio-posix.c:436 +#31 0x0000557eb47d2266 in aio_ctx_dispatch (source=0x557eb76c0200, +callback=0x0, user_data=0x0) at ../util/async.c:361 +#32 0x00007fc549232f4f in g_main_dispatch (context=0x557eb76c6430) at +../glib/gmain.c:3364 +#33 g_main_context_dispatch (context=0x557eb76c6430) at ../glib/gmain.c:4079 +#34 0x0000557eb47d3ab1 in glib_pollfds_poll () at ../util/main-loop.c:287 +#35 0x0000557eb47d3b38 in os_host_main_loop_wait (timeout=0) at +../util/main-loop.c:310 +#36 0x0000557eb47d3c58 in main_loop_wait (nonblocking=0) at +../util/main-loop.c:589 +#37 0x0000557eb4218b01 in qemu_main_loop () at ../system/runstate.c:835 +#38 0x0000557eb46df166 in qemu_default_main (opaque=0x0) at ../system/main.c:50 +#39 0x0000557eb46df215 in main (argc=24, argv=0x7ffd94b4f8d8) at +../system/main.c:80 +And here's coroutine trying to acquire read lock: +(gdb) qemu coroutine reader_queue->entries.sqh_first +#0 0x0000557eb47d7068 in qemu_coroutine_switch (from_=0x557eb7aa48b0, +to_=0x7fc537fff508, action=COROUTINE_YIELD) at ../util/coroutine-ucontext.c:321 +#1 0x0000557eb47d4d4a in qemu_coroutine_yield () at +../util/qemu-coroutine.c:339 +#2 0x0000557eb47d56c8 in qemu_co_queue_wait_impl (queue=0x557eb59954c0 +, lock=0x7fc53c57de50, flags=0) at +../util/qemu-coroutine-lock.c:60 +#3 0x0000557eb461fea7 in bdrv_graph_co_rdlock () at ../block/graph-lock.c:231 +#4 0x0000557eb460c81a in graph_lockable_auto_lock (x=0x7fc53c57dee3) at +/home/root/src/qemu/master/include/block/graph-lock.h:213 +#5 0x0000557eb460fa41 in blk_co_do_preadv_part + (blk=0x557eb84c0810, offset=6890553344, bytes=4096, qiov=0x7fc530006988, +qiov_offset=0, flags=BDRV_REQ_REGISTERED_BUF) at ../block/block-backend.c:1339 +#6 0x0000557eb46104d7 in blk_aio_read_entry (opaque=0x7fc530003240) at +../block/block-backend.c:1619 +#7 0x0000557eb47d6c40 in coroutine_trampoline (i0=-1213577040, i1=21886) at +../util/coroutine-ucontext.c:175 +#8 0x00007fc547c2a360 in __start_context () at +../sysdeps/unix/sysv/linux/x86_64/__start_context.S:91 +#9 0x00007ffd94b4ea40 in () +#10 0x0000000000000000 in () +So it looks like main thread is processing job-dismiss request and is +holding write lock taken in block_job_remove_all_bdrv() (frame #20 +above). At the same time iothread spawns a coroutine which performs IO +request. Before the coroutine is spawned, blk_aio_prwv() increases +'in_flight' counter for Blk. Then blk_co_do_preadv_part() (frame #5) is +trying to acquire the read lock. But main thread isn't releasing the +lock as blk_root_drained_poll() returns true since blk->in_flight > 0. +Here's the deadlock. + +Any comments and suggestions on the subject are welcomed. Thanks! +I think this is what the blk_wait_while_drained() call was supposed to +address in blk_co_do_preadv_part(). However, with the use of multiple +I/O threads, this is racy. + +Do you think that in your case we hit the small race window between the +checks in blk_wait_while_drained() and GRAPH_RDLOCK_GUARD()? Or is there +another reason why blk_wait_while_drained() didn't do its job? + +Kevin +At my opinion there is very big race window. Main thread has +eaten graph write lock. After that another coroutine is stalled +within GRAPH_RDLOCK_GUARD() as there is no drain at the moment and only +after that main thread has started drain. That is why Fiona's idea is +looking working. Though this would mean that normally we should always +do that at the moment when we acquire write lock. May be even inside +this function. Den + +Am 02.05.2025 um 19:52 hat Denis V. Lunev geschrieben: +> +On 5/2/25 19:34, Kevin Wolf wrote: +> +> Am 24.04.2025 um 19:32 hat Andrey Drobyshev geschrieben: +> +> > Hi all, +> +> > +> +> > There's a bug in block layer which leads to block graph deadlock. +> +> > Notably, it takes place when blockdev IO is processed within a separate +> +> > iothread. +> +> > +> +> > This was initially caught by our tests, and I was able to reduce it to a +> +> > relatively simple reproducer. Such deadlocks are probably supposed to +> +> > be covered in iotests/graph-changes-while-io, but this deadlock isn't. +> +> > +> +> > Basically what the reproducer does is launches QEMU with a drive having +> +> > 'iothread' option set, creates a chain of 2 snapshots, launches +> +> > block-commit job for a snapshot and then dismisses the job, starting +> +> > from the lower snapshot. If the guest is issuing IO at the same time, +> +> > there's a race in acquiring block graph lock and a potential deadlock. +> +> > +> +> > Here's how it can be reproduced: +> +> > +> +> > 1. Run QEMU: +> +> > > SRCDIR=/path/to/srcdir +> +> > > $SRCDIR/build/qemu-system-x86_64 -enable-kvm \ +> +> > > -machine q35 -cpu Nehalem \ +> +> > > -name guest=alma8-vm,debug-threads=on \ +> +> > > -m 2g -smp 2 \ +> +> > > -nographic -nodefaults \ +> +> > > -qmp unix:/var/run/alma8-qmp.sock,server=on,wait=off \ +> +> > > -serial unix:/var/run/alma8-serial.sock,server=on,wait=off \ +> +> > > -object iothread,id=iothread0 \ +> +> > > -blockdev +> +> > > node-name=disk,driver=qcow2,file.driver=file,file.filename=/path/to/img/alma8.qcow2 +> +> > > \ +> +> > > -device virtio-blk-pci,drive=disk,iothread=iothread0 +> +> > 2. Launch IO (random reads) from within the guest: +> +> > > nc -U /var/run/alma8-serial.sock +> +> > > ... +> +> > > [root@alma8-vm ~]# fio --name=randread --ioengine=libaio --direct=1 +> +> > > --bs=4k --size=1G --numjobs=1 --time_based=1 --runtime=300 +> +> > > --group_reporting --rw=randread --iodepth=1 --filename=/testfile +> +> > 3. Run snapshots creation & removal of lower snapshot operation in a +> +> > loop (script attached): +> +> > > while /bin/true ; do ./remove_lower_snap.sh ; done +> +> > And then it occasionally hangs. +> +> > +> +> > Note: I've tried bisecting this, and looks like deadlock occurs starting +> +> > from the following commit: +> +> > +> +> > (BAD) 5bdbaebcce virtio: Re-enable notifications after drain +> +> > (GOOD) c42c3833e0 virtio-scsi: Attach event vq notifier with no_poll +> +> > +> +> > On the latest v10.0.0 it does hang as well. +> +> > +> +> > +> +> > Here's backtrace of the main thread: +> +> > +> +> > > #0 0x00007fc547d427ce in __ppoll (fds=0x557eb79657b0, nfds=1, +> +> > > timeout=, sigmask=0x0) at +> +> > > ../sysdeps/unix/sysv/linux/ppoll.c:43 +> +> > > #1 0x0000557eb47d955c in qemu_poll_ns (fds=0x557eb79657b0, nfds=1, +> +> > > timeout=-1) at ../util/qemu-timer.c:329 +> +> > > #2 0x0000557eb47b2204 in fdmon_poll_wait (ctx=0x557eb76c5f20, +> +> > > ready_list=0x7ffd94b4edd8, timeout=-1) at ../util/fdmon-poll.c:79 +> +> > > #3 0x0000557eb47b1c45 in aio_poll (ctx=0x557eb76c5f20, blocking=true) +> +> > > at ../util/aio-posix.c:730 +> +> > > #4 0x0000557eb4621edd in bdrv_do_drained_begin (bs=0x557eb795e950, +> +> > > parent=0x0, poll=true) at ../block/io.c:378 +> +> > > #5 0x0000557eb4621f7b in bdrv_drained_begin (bs=0x557eb795e950) at +> +> > > ../block/io.c:391 +> +> > > #6 0x0000557eb45ec125 in bdrv_change_aio_context (bs=0x557eb795e950, +> +> > > ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, +> +> > > tran=0x557eb7a87160, errp=0x0) +> +> > > at ../block.c:7682 +> +> > > #7 0x0000557eb45ebf2b in bdrv_child_change_aio_context +> +> > > (c=0x557eb7964250, ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, +> +> > > tran=0x557eb7a87160, errp=0x0) +> +> > > at ../block.c:7608 +> +> > > #8 0x0000557eb45ec0c4 in bdrv_change_aio_context (bs=0x557eb79575e0, +> +> > > ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, +> +> > > tran=0x557eb7a87160, errp=0x0) +> +> > > at ../block.c:7668 +> +> > > #9 0x0000557eb45ebf2b in bdrv_child_change_aio_context +> +> > > (c=0x557eb7e59110, ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, +> +> > > tran=0x557eb7a87160, errp=0x0) +> +> > > at ../block.c:7608 +> +> > > #10 0x0000557eb45ec0c4 in bdrv_change_aio_context (bs=0x557eb7e51960, +> +> > > ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, +> +> > > tran=0x557eb7a87160, errp=0x0) +> +> > > at ../block.c:7668 +> +> > > #11 0x0000557eb45ebf2b in bdrv_child_change_aio_context +> +> > > (c=0x557eb814ed80, ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, +> +> > > tran=0x557eb7a87160, errp=0x0) +> +> > > at ../block.c:7608 +> +> > > #12 0x0000557eb45ee8e4 in child_job_change_aio_ctx (c=0x557eb7c9d3f0, +> +> > > ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, +> +> > > tran=0x557eb7a87160, errp=0x0) +> +> > > at ../blockjob.c:157 +> +> > > #13 0x0000557eb45ebe2d in bdrv_parent_change_aio_context +> +> > > (c=0x557eb7c9d3f0, ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, +> +> > > tran=0x557eb7a87160, errp=0x0) +> +> > > at ../block.c:7592 +> +> > > #14 0x0000557eb45ec06b in bdrv_change_aio_context (bs=0x557eb7d74310, +> +> > > ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, +> +> > > tran=0x557eb7a87160, errp=0x0) +> +> > > at ../block.c:7661 +> +> > > #15 0x0000557eb45dcd7e in bdrv_child_cb_change_aio_ctx +> +> > > (child=0x557eb8565af0, ctx=0x557eb76c5f20, visited=0x557eb7e06b60 +> +> > > = {...}, tran=0x557eb7a87160, errp=0x0) at ../block.c:1234 +> +> > > #16 0x0000557eb45ebe2d in bdrv_parent_change_aio_context +> +> > > (c=0x557eb8565af0, ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, +> +> > > tran=0x557eb7a87160, errp=0x0) +> +> > > at ../block.c:7592 +> +> > > #17 0x0000557eb45ec06b in bdrv_change_aio_context (bs=0x557eb79575e0, +> +> > > ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, +> +> > > tran=0x557eb7a87160, errp=0x0) +> +> > > at ../block.c:7661 +> +> > > #18 0x0000557eb45ec1f3 in bdrv_try_change_aio_context +> +> > > (bs=0x557eb79575e0, ctx=0x557eb76c5f20, ignore_child=0x0, errp=0x0) at +> +> > > ../block.c:7715 +> +> > > #19 0x0000557eb45e1b15 in bdrv_root_unref_child (child=0x557eb7966f30) +> +> > > at ../block.c:3317 +> +> > > #20 0x0000557eb45eeaa8 in block_job_remove_all_bdrv +> +> > > (job=0x557eb7952800) at ../blockjob.c:209 +> +> > > #21 0x0000557eb45ee641 in block_job_free (job=0x557eb7952800) at +> +> > > ../blockjob.c:82 +> +> > > #22 0x0000557eb45f17af in job_unref_locked (job=0x557eb7952800) at +> +> > > ../job.c:474 +> +> > > #23 0x0000557eb45f257d in job_do_dismiss_locked (job=0x557eb7952800) at +> +> > > ../job.c:771 +> +> > > #24 0x0000557eb45f25fe in job_dismiss_locked (jobptr=0x7ffd94b4f400, +> +> > > errp=0x7ffd94b4f488) at ../job.c:783 +> +> > > --Type for more, q to quit, c to continue without paging-- +> +> > > #25 0x0000557eb45d8e84 in qmp_job_dismiss (id=0x557eb7aa42b0 +> +> > > "commit-snap1", errp=0x7ffd94b4f488) at ../job-qmp.c:138 +> +> > > #26 0x0000557eb472f6a3 in qmp_marshal_job_dismiss (args=0x7fc52c00a3b0, +> +> > > ret=0x7fc53c880da8, errp=0x7fc53c880da0) at qapi/qapi-commands-job.c:221 +> +> > > #27 0x0000557eb47a35f3 in do_qmp_dispatch_bh (opaque=0x7fc53c880e40) at +> +> > > ../qapi/qmp-dispatch.c:128 +> +> > > #28 0x0000557eb47d1cd2 in aio_bh_call (bh=0x557eb79568f0) at +> +> > > ../util/async.c:172 +> +> > > #29 0x0000557eb47d1df5 in aio_bh_poll (ctx=0x557eb76c0200) at +> +> > > ../util/async.c:219 +> +> > > #30 0x0000557eb47b12f3 in aio_dispatch (ctx=0x557eb76c0200) at +> +> > > ../util/aio-posix.c:436 +> +> > > #31 0x0000557eb47d2266 in aio_ctx_dispatch (source=0x557eb76c0200, +> +> > > callback=0x0, user_data=0x0) at ../util/async.c:361 +> +> > > #32 0x00007fc549232f4f in g_main_dispatch (context=0x557eb76c6430) at +> +> > > ../glib/gmain.c:3364 +> +> > > #33 g_main_context_dispatch (context=0x557eb76c6430) at +> +> > > ../glib/gmain.c:4079 +> +> > > #34 0x0000557eb47d3ab1 in glib_pollfds_poll () at +> +> > > ../util/main-loop.c:287 +> +> > > #35 0x0000557eb47d3b38 in os_host_main_loop_wait (timeout=0) at +> +> > > ../util/main-loop.c:310 +> +> > > #36 0x0000557eb47d3c58 in main_loop_wait (nonblocking=0) at +> +> > > ../util/main-loop.c:589 +> +> > > #37 0x0000557eb4218b01 in qemu_main_loop () at ../system/runstate.c:835 +> +> > > #38 0x0000557eb46df166 in qemu_default_main (opaque=0x0) at +> +> > > ../system/main.c:50 +> +> > > #39 0x0000557eb46df215 in main (argc=24, argv=0x7ffd94b4f8d8) at +> +> > > ../system/main.c:80 +> +> > +> +> > And here's coroutine trying to acquire read lock: +> +> > +> +> > > (gdb) qemu coroutine reader_queue->entries.sqh_first +> +> > > #0 0x0000557eb47d7068 in qemu_coroutine_switch (from_=0x557eb7aa48b0, +> +> > > to_=0x7fc537fff508, action=COROUTINE_YIELD) at +> +> > > ../util/coroutine-ucontext.c:321 +> +> > > #1 0x0000557eb47d4d4a in qemu_coroutine_yield () at +> +> > > ../util/qemu-coroutine.c:339 +> +> > > #2 0x0000557eb47d56c8 in qemu_co_queue_wait_impl (queue=0x557eb59954c0 +> +> > > , lock=0x7fc53c57de50, flags=0) at +> +> > > ../util/qemu-coroutine-lock.c:60 +> +> > > #3 0x0000557eb461fea7 in bdrv_graph_co_rdlock () at +> +> > > ../block/graph-lock.c:231 +> +> > > #4 0x0000557eb460c81a in graph_lockable_auto_lock (x=0x7fc53c57dee3) +> +> > > at /home/root/src/qemu/master/include/block/graph-lock.h:213 +> +> > > #5 0x0000557eb460fa41 in blk_co_do_preadv_part +> +> > > (blk=0x557eb84c0810, offset=6890553344, bytes=4096, +> +> > > qiov=0x7fc530006988, qiov_offset=0, flags=BDRV_REQ_REGISTERED_BUF) at +> +> > > ../block/block-backend.c:1339 +> +> > > #6 0x0000557eb46104d7 in blk_aio_read_entry (opaque=0x7fc530003240) at +> +> > > ../block/block-backend.c:1619 +> +> > > #7 0x0000557eb47d6c40 in coroutine_trampoline (i0=-1213577040, +> +> > > i1=21886) at ../util/coroutine-ucontext.c:175 +> +> > > #8 0x00007fc547c2a360 in __start_context () at +> +> > > ../sysdeps/unix/sysv/linux/x86_64/__start_context.S:91 +> +> > > #9 0x00007ffd94b4ea40 in () +> +> > > #10 0x0000000000000000 in () +> +> > +> +> > So it looks like main thread is processing job-dismiss request and is +> +> > holding write lock taken in block_job_remove_all_bdrv() (frame #20 +> +> > above). At the same time iothread spawns a coroutine which performs IO +> +> > request. Before the coroutine is spawned, blk_aio_prwv() increases +> +> > 'in_flight' counter for Blk. Then blk_co_do_preadv_part() (frame #5) is +> +> > trying to acquire the read lock. But main thread isn't releasing the +> +> > lock as blk_root_drained_poll() returns true since blk->in_flight > 0. +> +> > Here's the deadlock. +> +> > +> +> > Any comments and suggestions on the subject are welcomed. Thanks! +> +> I think this is what the blk_wait_while_drained() call was supposed to +> +> address in blk_co_do_preadv_part(). However, with the use of multiple +> +> I/O threads, this is racy. +> +> +> +> Do you think that in your case we hit the small race window between the +> +> checks in blk_wait_while_drained() and GRAPH_RDLOCK_GUARD()? Or is there +> +> another reason why blk_wait_while_drained() didn't do its job? +> +> +> +At my opinion there is very big race window. Main thread has +> +eaten graph write lock. After that another coroutine is stalled +> +within GRAPH_RDLOCK_GUARD() as there is no drain at the moment and only +> +after that main thread has started drain. +You're right, I confused taking the write lock with draining there. + +> +That is why Fiona's idea is looking working. Though this would mean +> +that normally we should always do that at the moment when we acquire +> +write lock. May be even inside this function. +I actually see now that not all of my graph locking patches were merged. +At least I did have the thought that bdrv_drained_begin() must be marked +GRAPH_UNLOCKED because it polls. That means that calling it from inside +bdrv_try_change_aio_context() is actually forbidden (and that's the part +I didn't see back then because it doesn't have TSA annotations). + +If you refactor the code to move the drain out to before the lock is +taken, I think you end up with Fiona's patch, except you'll remove the +forbidden inner drain and add more annotations for some functions and +clarify the rules around them. I don't know, but I wouldn't be surprised +if along the process we find other bugs, too. + +So Fiona's drain looks right to me, but we should probably approach it +more systematically. + +Kevin + diff --git a/classification_output/01/instruction/26095107 b/classification_output/01/instruction/26095107 new file mode 100644 index 000000000..c06d35dd8 --- /dev/null +++ b/classification_output/01/instruction/26095107 @@ -0,0 +1,158 @@ +instruction: 0.991 +other: 0.979 +semantic: 0.974 +mistranslation: 0.930 + +[Qemu-devel] [Bug Report] vm paused after succeeding to migrate + +Hi, all +I encounterd a bug when I try to migrate a windows vm. + +Enviroment information: +host A: cpu E5620(model WestmereEP without flag xsave) +host B: cpu E5-2643(model SandyBridgeEP with xsave) + +The reproduce steps is : +1. Start a windows 2008 vm with -cpu host(which means host-passthrough). +2. Migrate the vm to host B when cr4.OSXSAVE=0 (successfully). +3. Vm runs on host B for a while so that cr4.OSXSAVE changes to 1. +4. Then migrate the vm to host A (successfully), but vm was paused, and qemu +printed log as followed: + +KVM: entry failed, hardware error 0x80000021 + +If you're running a guest on an Intel machine without unrestricted mode +support, the failure can be most likely due to the guest entering an invalid +state for Intel VT. For example, the guest maybe running in big real mode +which is not supported on less recent Intel processors. + +EAX=019b3bb0 EBX=01a3ae80 ECX=01a61ce8 EDX=00000000 +ESI=01a62000 EDI=00000000 EBP=00000000 ESP=01718b20 +EIP=0185d982 EFL=00000286 [--S--P-] CPL=0 II=0 A20=1 SMM=0 HLT=0 +ES =0000 00000000 0000ffff 00009300 +CS =f000 ffff0000 0000ffff 00009b00 +SS =0000 00000000 0000ffff 00009300 +DS =0000 00000000 0000ffff 00009300 +FS =0000 00000000 0000ffff 00009300 +GS =0000 00000000 0000ffff 00009300 +LDT=0000 00000000 0000ffff 00008200 +TR =0000 00000000 0000ffff 00008b00 +GDT= 00000000 0000ffff +IDT= 00000000 0000ffff +CR0=60000010 CR2=00000000 CR3=00000000 CR4=00000000 +DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000 +DR3=0000000000000000 +DR6=00000000ffff0ff0 DR7=0000000000000400 +EFER=0000000000000000 +Code=00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 <00> 00 00 00 +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + +I have found that problem happened when kvm_put_sregs returns err -22(called by +kvm_arch_put_registers(qemu)). +Because kvm_arch_vcpu_ioctl_set_sregs(kvm-mod) checked that guest_cpuid_has no +X86_FEATURE_XSAVE but cr4.OSXSAVE=1. +So should we cancel migration when kvm_arch_put_registers returns error? + +* linzhecheng (address@hidden) wrote: +> +Hi, all +> +I encounterd a bug when I try to migrate a windows vm. +> +> +Enviroment information: +> +host A: cpu E5620(model WestmereEP without flag xsave) +> +host B: cpu E5-2643(model SandyBridgeEP with xsave) +> +> +The reproduce steps is : +> +1. Start a windows 2008 vm with -cpu host(which means host-passthrough). +> +2. Migrate the vm to host B when cr4.OSXSAVE=0 (successfully). +> +3. Vm runs on host B for a while so that cr4.OSXSAVE changes to 1. +> +4. Then migrate the vm to host A (successfully), but vm was paused, and qemu +> +printed log as followed: +Remember that migrating using -cpu host across different CPU models is NOT +expected to work. + +> +KVM: entry failed, hardware error 0x80000021 +> +> +If you're running a guest on an Intel machine without unrestricted mode +> +support, the failure can be most likely due to the guest entering an invalid +> +state for Intel VT. For example, the guest maybe running in big real mode +> +which is not supported on less recent Intel processors. +> +> +EAX=019b3bb0 EBX=01a3ae80 ECX=01a61ce8 EDX=00000000 +> +ESI=01a62000 EDI=00000000 EBP=00000000 ESP=01718b20 +> +EIP=0185d982 EFL=00000286 [--S--P-] CPL=0 II=0 A20=1 SMM=0 HLT=0 +> +ES =0000 00000000 0000ffff 00009300 +> +CS =f000 ffff0000 0000ffff 00009b00 +> +SS =0000 00000000 0000ffff 00009300 +> +DS =0000 00000000 0000ffff 00009300 +> +FS =0000 00000000 0000ffff 00009300 +> +GS =0000 00000000 0000ffff 00009300 +> +LDT=0000 00000000 0000ffff 00008200 +> +TR =0000 00000000 0000ffff 00008b00 +> +GDT= 00000000 0000ffff +> +IDT= 00000000 0000ffff +> +CR0=60000010 CR2=00000000 CR3=00000000 CR4=00000000 +> +DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000 +> +DR3=0000000000000000 +> +DR6=00000000ffff0ff0 DR7=0000000000000400 +> +EFER=0000000000000000 +> +Code=00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 <00> 00 00 +> +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +> +00 +> +> +I have found that problem happened when kvm_put_sregs returns err -22(called +> +by kvm_arch_put_registers(qemu)). +> +Because kvm_arch_vcpu_ioctl_set_sregs(kvm-mod) checked that guest_cpuid_has +> +no X86_FEATURE_XSAVE but cr4.OSXSAVE=1. +> +So should we cancel migration when kvm_arch_put_registers returns error? +It would seem good if we can make the migration fail there rather than +hitting that KVM error. +It looks like we need to do a bit of plumbing to convert the places that +call it to return a bool rather than void. + +Dave + +-- +Dr. David Alan Gilbert / address@hidden / Manchester, UK + diff --git a/classification_output/01/instruction/2609717 b/classification_output/01/instruction/2609717 deleted file mode 100644 index b8e563ad9..000000000 --- a/classification_output/01/instruction/2609717 +++ /dev/null @@ -1,4939 +0,0 @@ -instruction: 0.693 -mistranslation: 0.687 -semantic: 0.656 -other: 0.637 - -[BUG] cxl can not create region - -Hi list - -I want to test cxl functions in arm64, and found some problems I can't -figure out. - -My test environment: - -1. build latest bios from -https://github.com/tianocore/edk2.git -master -branch(cc2db6ebfb6d9d85ba4c7b35fba1fa37fffc0bc2) -2. build latest qemu-system-aarch64 from git://git.qemu.org/qemu.git -master branch(846dcf0ba4eff824c295f06550b8673ff3f31314). With cxl arm -support patch: -https://patchwork.kernel.org/project/cxl/cover/20220616141950.23374-1-Jonathan.Cameron@huawei.com/ -3. build Linux kernel from -https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git -preview -branch(65fc1c3d26b96002a5aa1f4012fae4dc98fd5683) -4. build latest ndctl tools from -https://github.com/pmem/ndctl -create_region branch(8558b394e449779e3a4f3ae90fae77ede0bca159) - -And my qemu test commands: -sudo $QEMU_BIN -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 \ - -cpu max -smp 8 -nographic -no-reboot \ - -kernel $KERNEL -bios $BIOS_BIN \ - -drive if=none,file=$ROOTFS,format=qcow2,id=hd \ - -device virtio-blk-pci,drive=hd -append 'root=/dev/vda1 -nokaslr dyndbg="module cxl* +p"' \ - -object memory-backend-ram,size=4G,id=mem0 \ - -numa node,nodeid=0,cpus=0-7,memdev=mem0 \ - -net nic -net user,hostfwd=tcp::2222-:22 -enable-kvm \ - -object -memory-backend-file,id=cxl-mem0,share=on,mem-path=/tmp/cxltest.raw,size=256M -\ - -object -memory-backend-file,id=cxl-mem1,share=on,mem-path=/tmp/cxltest1.raw,size=256M -\ - -object -memory-backend-file,id=cxl-mem2,share=on,mem-path=/tmp/cxltest2.raw,size=256M -\ - -object -memory-backend-file,id=cxl-mem3,share=on,mem-path=/tmp/cxltest3.raw,size=256M -\ - -object -memory-backend-file,id=cxl-lsa0,share=on,mem-path=/tmp/lsa0.raw,size=256M -\ - -object -memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/lsa1.raw,size=256M -\ - -object -memory-backend-file,id=cxl-lsa2,share=on,mem-path=/tmp/lsa2.raw,size=256M -\ - -object -memory-backend-file,id=cxl-lsa3,share=on,mem-path=/tmp/lsa3.raw,size=256M -\ - -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \ - -device cxl-rp,port=0,bus=cxl.1,id=root_port0,chassis=0,slot=0 \ - -device cxl-upstream,bus=root_port0,id=us0 \ - -device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \ - -device -cxl-type3,bus=swport0,memdev=cxl-mem0,lsa=cxl-lsa0,id=cxl-pmem0 \ - -device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \ - -device -cxl-type3,bus=swport1,memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem1 \ - -device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \ - -device -cxl-type3,bus=swport2,memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem2 \ - -device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \ - -device -cxl-type3,bus=swport3,memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3 \ - -M -cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k - -And I have got two problems. -1. When I want to create x1 region with command: "cxl create-region -d -decoder0.0 -w 1 -g 4096 mem0", kernel crashed with null pointer -reference. Crash log: - -[ 534.697324] cxl_region region0: config state: 0 -[ 534.697346] cxl_region region0: probe: -6 -[ 534.697368] cxl_acpi ACPI0017:00: decoder0.0: created region0 -[ 534.699115] cxl region0: mem0:endpoint3 decoder3.0 add: -mem0:decoder3.0 @ 0 next: none nr_eps: 1 nr_targets: 1 -[ 534.699149] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: -mem0:decoder3.0 @ 0 next: mem0 nr_eps: 1 nr_targets: 1 -[ 534.699167] cxl region0: ACPI0016:00:port1 decoder1.0 add: -mem0:decoder3.0 @ 0 next: 0000:0d:00.0 nr_eps: 1 nr_targets: 1 -[ 534.699176] cxl region0: ACPI0016:00:port1 iw: 1 ig: 256 -[ 534.699182] cxl region0: ACPI0016:00:port1 target[0] = 0000:0c:00.0 -for mem0:decoder3.0 @ 0 -[ 534.699189] cxl region0: 0000:0d:00.0:port2 iw: 1 ig: 256 -[ 534.699193] cxl region0: 0000:0d:00.0:port2 target[0] = -0000:0e:00.0 for mem0:decoder3.0 @ 0 -[ 534.699405] Unable to handle kernel NULL pointer dereference at -virtual address 0000000000000000 -[ 534.701474] Mem abort info: -[ 534.701994] ESR = 0x0000000086000004 -[ 534.702653] EC = 0x21: IABT (current EL), IL = 32 bits -[ 534.703616] SET = 0, FnV = 0 -[ 534.704174] EA = 0, S1PTW = 0 -[ 534.704803] FSC = 0x04: level 0 translation fault -[ 534.705694] user pgtable: 4k pages, 48-bit VAs, pgdp=000000010144a000 -[ 534.706875] [0000000000000000] pgd=0000000000000000, p4d=0000000000000000 -[ 534.709855] Internal error: Oops: 86000004 [#1] PREEMPT SMP -[ 534.710301] Modules linked in: -[ 534.710546] CPU: 7 PID: 331 Comm: cxl Not tainted -5.19.0-rc3-00064-g65fc1c3d26b9-dirty #11 -[ 534.715393] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 -[ 534.717179] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) -[ 534.719190] pc : 0x0 -[ 534.719928] lr : commit_store+0x118/0x2cc -[ 534.721007] sp : ffff80000aec3c30 -[ 534.721793] x29: ffff80000aec3c30 x28: ffff0000da62e740 x27: ffff0000c0c06b30 -[ 534.723875] x26: 0000000000000000 x25: ffff0000c0a2a400 x24: ffff0000c0a29400 -[ 534.725440] x23: 0000000000000003 x22: 0000000000000000 x21: ffff0000c0c06800 -[ 534.727312] x20: 0000000000000000 x19: ffff0000c1559800 x18: 0000000000000000 -[ 534.729138] x17: 0000000000000000 x16: 0000000000000000 x15: 0000ffffd41fe838 -[ 534.731046] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 -[ 534.732402] x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000 -[ 534.734432] x8 : 0000000000000000 x7 : 0000000000000000 x6 : ffff0000c0906e80 -[ 534.735921] x5 : 0000000000000000 x4 : 0000000000000000 x3 : ffff80000aec3bf0 -[ 534.737437] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff0000c155a000 -[ 534.738878] Call trace: -[ 534.739368] 0x0 -[ 534.739713] dev_attr_store+0x1c/0x30 -[ 534.740186] sysfs_kf_write+0x48/0x58 -[ 534.740961] kernfs_fop_write_iter+0x128/0x184 -[ 534.741872] new_sync_write+0xdc/0x158 -[ 534.742706] vfs_write+0x1ac/0x2a8 -[ 534.743440] ksys_write+0x68/0xf0 -[ 534.744328] __arm64_sys_write+0x1c/0x28 -[ 534.745180] invoke_syscall+0x44/0xf0 -[ 534.745989] el0_svc_common+0x4c/0xfc -[ 534.746661] do_el0_svc+0x60/0xa8 -[ 534.747378] el0_svc+0x2c/0x78 -[ 534.748066] el0t_64_sync_handler+0xb8/0x12c -[ 534.748919] el0t_64_sync+0x18c/0x190 -[ 534.749629] Code: bad PC value -[ 534.750169] ---[ end trace 0000000000000000 ]--- - -2. When I want to create x4 region with command: "cxl create-region -d -decoder0.0 -w 4 -g 4096 -m mem0 mem1 mem2 mem3". I got below errors: - -cxl region: create_region: region0: failed to set target3 to mem3 -cxl region: cmd_create_region: created 0 regions - -And kernel log as below: -[ 60.536663] cxl_region region0: config state: 0 -[ 60.536675] cxl_region region0: probe: -6 -[ 60.536696] cxl_acpi ACPI0017:00: decoder0.0: created region0 -[ 60.538251] cxl region0: mem0:endpoint3 decoder3.0 add: -mem0:decoder3.0 @ 0 next: none nr_eps: 1 nr_targets: 1 -[ 60.538278] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: -mem0:decoder3.0 @ 0 next: mem0 nr_eps: 1 nr_targets: 1 -[ 60.538295] cxl region0: ACPI0016:00:port1 decoder1.0 add: -mem0:decoder3.0 @ 0 next: 0000:0d:00.0 nr_eps: 1 nr_targets: 1 -[ 60.538647] cxl region0: mem1:endpoint4 decoder4.0 add: -mem1:decoder4.0 @ 1 next: none nr_eps: 1 nr_targets: 1 -[ 60.538663] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: -mem1:decoder4.0 @ 1 next: mem1 nr_eps: 2 nr_targets: 2 -[ 60.538675] cxl region0: ACPI0016:00:port1 decoder1.0 add: -mem1:decoder4.0 @ 1 next: 0000:0d:00.0 nr_eps: 2 nr_targets: 1 -[ 60.539311] cxl region0: mem2:endpoint5 decoder5.0 add: -mem2:decoder5.0 @ 2 next: none nr_eps: 1 nr_targets: 1 -[ 60.539332] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: -mem2:decoder5.0 @ 2 next: mem2 nr_eps: 3 nr_targets: 3 -[ 60.539343] cxl region0: ACPI0016:00:port1 decoder1.0 add: -mem2:decoder5.0 @ 2 next: 0000:0d:00.0 nr_eps: 3 nr_targets: 1 -[ 60.539711] cxl region0: mem3:endpoint6 decoder6.0 add: -mem3:decoder6.0 @ 3 next: none nr_eps: 1 nr_targets: 1 -[ 60.539723] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: -mem3:decoder6.0 @ 3 next: mem3 nr_eps: 4 nr_targets: 4 -[ 60.539735] cxl region0: ACPI0016:00:port1 decoder1.0 add: -mem3:decoder6.0 @ 3 next: 0000:0d:00.0 nr_eps: 4 nr_targets: 1 -[ 60.539742] cxl region0: ACPI0016:00:port1 iw: 1 ig: 256 -[ 60.539747] cxl region0: ACPI0016:00:port1 target[0] = 0000:0c:00.0 -for mem0:decoder3.0 @ 0 -[ 60.539754] cxl region0: 0000:0d:00.0:port2 iw: 4 ig: 512 -[ 60.539758] cxl region0: 0000:0d:00.0:port2 target[0] = -0000:0e:00.0 for mem0:decoder3.0 @ 0 -[ 60.539764] cxl region0: ACPI0016:00:port1: cannot host mem1:decoder4.0 at 1 - -I have tried to write sysfs node manually, got same errors. - -Hope I can get some helps here. - -Bob - -On Fri, 5 Aug 2022 10:20:23 +0800 -Bobo WL wrote: - -> -Hi list -> -> -I want to test cxl functions in arm64, and found some problems I can't -> -figure out. -Hi Bob, - -Glad to see people testing this code. - -> -> -My test environment: -> -> -1. build latest bios from -https://github.com/tianocore/edk2.git -master -> -branch(cc2db6ebfb6d9d85ba4c7b35fba1fa37fffc0bc2) -> -2. build latest qemu-system-aarch64 from git://git.qemu.org/qemu.git -> -master branch(846dcf0ba4eff824c295f06550b8673ff3f31314). With cxl arm -> -support patch: -> -https://patchwork.kernel.org/project/cxl/cover/20220616141950.23374-1-Jonathan.Cameron@huawei.com/ -> -3. build Linux kernel from -> -https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git -preview -> -branch(65fc1c3d26b96002a5aa1f4012fae4dc98fd5683) -> -4. build latest ndctl tools from -https://github.com/pmem/ndctl -> -create_region branch(8558b394e449779e3a4f3ae90fae77ede0bca159) -> -> -And my qemu test commands: -> -sudo $QEMU_BIN -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 \ -> --cpu max -smp 8 -nographic -no-reboot \ -> --kernel $KERNEL -bios $BIOS_BIN \ -> --drive if=none,file=$ROOTFS,format=qcow2,id=hd \ -> --device virtio-blk-pci,drive=hd -append 'root=/dev/vda1 -> -nokaslr dyndbg="module cxl* +p"' \ -> --object memory-backend-ram,size=4G,id=mem0 \ -> --numa node,nodeid=0,cpus=0-7,memdev=mem0 \ -> --net nic -net user,hostfwd=tcp::2222-:22 -enable-kvm \ -> --object -> -memory-backend-file,id=cxl-mem0,share=on,mem-path=/tmp/cxltest.raw,size=256M -> -\ -> --object -> -memory-backend-file,id=cxl-mem1,share=on,mem-path=/tmp/cxltest1.raw,size=256M -> -\ -> --object -> -memory-backend-file,id=cxl-mem2,share=on,mem-path=/tmp/cxltest2.raw,size=256M -> -\ -> --object -> -memory-backend-file,id=cxl-mem3,share=on,mem-path=/tmp/cxltest3.raw,size=256M -> -\ -> --object -> -memory-backend-file,id=cxl-lsa0,share=on,mem-path=/tmp/lsa0.raw,size=256M -> -\ -> --object -> -memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/lsa1.raw,size=256M -> -\ -> --object -> -memory-backend-file,id=cxl-lsa2,share=on,mem-path=/tmp/lsa2.raw,size=256M -> -\ -> --object -> -memory-backend-file,id=cxl-lsa3,share=on,mem-path=/tmp/lsa3.raw,size=256M -> -\ -> --device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \ -> --device cxl-rp,port=0,bus=cxl.1,id=root_port0,chassis=0,slot=0 \ -Probably not related to your problem, but there is a disconnect in QEMU / -kernel assumptionsaround the presence of an HDM decoder when a HB only -has a single root port. Spec allows it to be provided or not as an -implementation choice. -Kernel assumes it isn't provide. Qemu assumes it is. - -The temporary solution is to throw in a second root port on the HB and not -connect anything to it. Longer term I may special case this so that the -particular -decoder defaults to pass through settings in QEMU if there is only one root -port. - -> --device cxl-upstream,bus=root_port0,id=us0 \ -> --device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \ -> --device -> -cxl-type3,bus=swport0,memdev=cxl-mem0,lsa=cxl-lsa0,id=cxl-pmem0 \ -> --device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \ -> --device -> -cxl-type3,bus=swport1,memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem1 \ -> --device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \ -> --device -> -cxl-type3,bus=swport2,memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem2 \ -> --device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \ -> --device -> -cxl-type3,bus=swport3,memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3 \ -> --M -> -cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k -> -> -And I have got two problems. -> -1. When I want to create x1 region with command: "cxl create-region -d -> -decoder0.0 -w 1 -g 4096 mem0", kernel crashed with null pointer -> -reference. Crash log: -> -> -[ 534.697324] cxl_region region0: config state: 0 -> -[ 534.697346] cxl_region region0: probe: -6 -Seems odd this is up here. But maybe fine. - -> -[ 534.697368] cxl_acpi ACPI0017:00: decoder0.0: created region0 -> -[ 534.699115] cxl region0: mem0:endpoint3 decoder3.0 add: -> -mem0:decoder3.0 @ 0 next: none nr_eps: 1 nr_targets: 1 -> -[ 534.699149] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: -> -mem0:decoder3.0 @ 0 next: mem0 nr_eps: 1 nr_targets: 1 -> -[ 534.699167] cxl region0: ACPI0016:00:port1 decoder1.0 add: -> -mem0:decoder3.0 @ 0 next: 0000:0d:00.0 nr_eps: 1 nr_targets: 1 -> -[ 534.699176] cxl region0: ACPI0016:00:port1 iw: 1 ig: 256 -> -[ 534.699182] cxl region0: ACPI0016:00:port1 target[0] = 0000:0c:00.0 -> -for mem0:decoder3.0 @ 0 -> -[ 534.699189] cxl region0: 0000:0d:00.0:port2 iw: 1 ig: 256 -> -[ 534.699193] cxl region0: 0000:0d:00.0:port2 target[0] = -> -0000:0e:00.0 for mem0:decoder3.0 @ 0 -> -[ 534.699405] Unable to handle kernel NULL pointer dereference at -> -virtual address 0000000000000000 -> -[ 534.701474] Mem abort info: -> -[ 534.701994] ESR = 0x0000000086000004 -> -[ 534.702653] EC = 0x21: IABT (current EL), IL = 32 bits -> -[ 534.703616] SET = 0, FnV = 0 -> -[ 534.704174] EA = 0, S1PTW = 0 -> -[ 534.704803] FSC = 0x04: level 0 translation fault -> -[ 534.705694] user pgtable: 4k pages, 48-bit VAs, pgdp=000000010144a000 -> -[ 534.706875] [0000000000000000] pgd=0000000000000000, p4d=0000000000000000 -> -[ 534.709855] Internal error: Oops: 86000004 [#1] PREEMPT SMP -> -[ 534.710301] Modules linked in: -> -[ 534.710546] CPU: 7 PID: 331 Comm: cxl Not tainted -> -5.19.0-rc3-00064-g65fc1c3d26b9-dirty #11 -> -[ 534.715393] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 -> -[ 534.717179] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) -> -[ 534.719190] pc : 0x0 -> -[ 534.719928] lr : commit_store+0x118/0x2cc -> -[ 534.721007] sp : ffff80000aec3c30 -> -[ 534.721793] x29: ffff80000aec3c30 x28: ffff0000da62e740 x27: -> -ffff0000c0c06b30 -> -[ 534.723875] x26: 0000000000000000 x25: ffff0000c0a2a400 x24: -> -ffff0000c0a29400 -> -[ 534.725440] x23: 0000000000000003 x22: 0000000000000000 x21: -> -ffff0000c0c06800 -> -[ 534.727312] x20: 0000000000000000 x19: ffff0000c1559800 x18: -> -0000000000000000 -> -[ 534.729138] x17: 0000000000000000 x16: 0000000000000000 x15: -> -0000ffffd41fe838 -> -[ 534.731046] x14: 0000000000000000 x13: 0000000000000000 x12: -> -0000000000000000 -> -[ 534.732402] x11: 0000000000000000 x10: 0000000000000000 x9 : -> -0000000000000000 -> -[ 534.734432] x8 : 0000000000000000 x7 : 0000000000000000 x6 : -> -ffff0000c0906e80 -> -[ 534.735921] x5 : 0000000000000000 x4 : 0000000000000000 x3 : -> -ffff80000aec3bf0 -> -[ 534.737437] x2 : 0000000000000000 x1 : 0000000000000000 x0 : -> -ffff0000c155a000 -> -[ 534.738878] Call trace: -> -[ 534.739368] 0x0 -> -[ 534.739713] dev_attr_store+0x1c/0x30 -> -[ 534.740186] sysfs_kf_write+0x48/0x58 -> -[ 534.740961] kernfs_fop_write_iter+0x128/0x184 -> -[ 534.741872] new_sync_write+0xdc/0x158 -> -[ 534.742706] vfs_write+0x1ac/0x2a8 -> -[ 534.743440] ksys_write+0x68/0xf0 -> -[ 534.744328] __arm64_sys_write+0x1c/0x28 -> -[ 534.745180] invoke_syscall+0x44/0xf0 -> -[ 534.745989] el0_svc_common+0x4c/0xfc -> -[ 534.746661] do_el0_svc+0x60/0xa8 -> -[ 534.747378] el0_svc+0x2c/0x78 -> -[ 534.748066] el0t_64_sync_handler+0xb8/0x12c -> -[ 534.748919] el0t_64_sync+0x18c/0x190 -> -[ 534.749629] Code: bad PC value -> -[ 534.750169] ---[ end trace 0000000000000000 ]--- -> -> -2. When I want to create x4 region with command: "cxl create-region -d -> -decoder0.0 -w 4 -g 4096 -m mem0 mem1 mem2 mem3". I got below errors: -> -> -cxl region: create_region: region0: failed to set target3 to mem3 -> -cxl region: cmd_create_region: created 0 regions -> -> -And kernel log as below: -> -[ 60.536663] cxl_region region0: config state: 0 -> -[ 60.536675] cxl_region region0: probe: -6 -> -[ 60.536696] cxl_acpi ACPI0017:00: decoder0.0: created region0 -> -[ 60.538251] cxl region0: mem0:endpoint3 decoder3.0 add: -> -mem0:decoder3.0 @ 0 next: none nr_eps: 1 nr_targets: 1 -> -[ 60.538278] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: -> -mem0:decoder3.0 @ 0 next: mem0 nr_eps: 1 nr_targets: 1 -> -[ 60.538295] cxl region0: ACPI0016:00:port1 decoder1.0 add: -> -mem0:decoder3.0 @ 0 next: 0000:0d:00.0 nr_eps: 1 nr_targets: 1 -> -[ 60.538647] cxl region0: mem1:endpoint4 decoder4.0 add: -> -mem1:decoder4.0 @ 1 next: none nr_eps: 1 nr_targets: 1 -> -[ 60.538663] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: -> -mem1:decoder4.0 @ 1 next: mem1 nr_eps: 2 nr_targets: 2 -> -[ 60.538675] cxl region0: ACPI0016:00:port1 decoder1.0 add: -> -mem1:decoder4.0 @ 1 next: 0000:0d:00.0 nr_eps: 2 nr_targets: 1 -> -[ 60.539311] cxl region0: mem2:endpoint5 decoder5.0 add: -> -mem2:decoder5.0 @ 2 next: none nr_eps: 1 nr_targets: 1 -> -[ 60.539332] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: -> -mem2:decoder5.0 @ 2 next: mem2 nr_eps: 3 nr_targets: 3 -> -[ 60.539343] cxl region0: ACPI0016:00:port1 decoder1.0 add: -> -mem2:decoder5.0 @ 2 next: 0000:0d:00.0 nr_eps: 3 nr_targets: 1 -> -[ 60.539711] cxl region0: mem3:endpoint6 decoder6.0 add: -> -mem3:decoder6.0 @ 3 next: none nr_eps: 1 nr_targets: 1 -> -[ 60.539723] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: -> -mem3:decoder6.0 @ 3 next: mem3 nr_eps: 4 nr_targets: 4 -> -[ 60.539735] cxl region0: ACPI0016:00:port1 decoder1.0 add: -> -mem3:decoder6.0 @ 3 next: 0000:0d:00.0 nr_eps: 4 nr_targets: 1 -> -[ 60.539742] cxl region0: ACPI0016:00:port1 iw: 1 ig: 256 -> -[ 60.539747] cxl region0: ACPI0016:00:port1 target[0] = 0000:0c:00.0 -> -for mem0:decoder3.0 @ 0 -> -[ 60.539754] cxl region0: 0000:0d:00.0:port2 iw: 4 ig: 512 -This looks like off by 1 that should be fixed in the below mentioned -cxl/pending branch. That ig should be 256. Note the fix was -for a test case with a fat HB and no switch, but certainly looks -like this is the same issue. - -> -[ 60.539758] cxl region0: 0000:0d:00.0:port2 target[0] = -> -0000:0e:00.0 for mem0:decoder3.0 @ 0 -> -[ 60.539764] cxl region0: ACPI0016:00:port1: cannot host mem1:decoder4.0 at -> -1 -> -> -I have tried to write sysfs node manually, got same errors. -When stepping through by hand, which sysfs write triggers the crash above? - -Not sure it's related, but I've just sent out a fix to the -target register handling in QEMU. -20220808122051.14822-1-Jonathan.Cameron@huawei.com -/T/#m47ff985412ce44559e6b04d677c302f8cd371330">https://lore.kernel.org/linux-cxl/ -20220808122051.14822-1-Jonathan.Cameron@huawei.com -/T/#m47ff985412ce44559e6b04d677c302f8cd371330 -I did have one instance last week of triggering what looked to be a race -condition but -the stack trace doesn't looks related to what you've hit. - -It will probably be a few days before I have time to take a look at replicating -what you have seen. - -If you have time, try using the kernel.org cxl/pending branch as there are -a few additional fixes on there since you sent this email. Optimistic to hope -this is covered by one of those, but at least it will mean we are trying to -replicate -on same branch. - -Jonathan - - -> -> -Hope I can get some helps here. -> -> -Bob - -Hi Jonathan - -Thanks for your reply! - -On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron - wrote: -> -> -Probably not related to your problem, but there is a disconnect in QEMU / -> -kernel assumptionsaround the presence of an HDM decoder when a HB only -> -has a single root port. Spec allows it to be provided or not as an -> -implementation choice. -> -Kernel assumes it isn't provide. Qemu assumes it is. -> -> -The temporary solution is to throw in a second root port on the HB and not -> -connect anything to it. Longer term I may special case this so that the -> -particular -> -decoder defaults to pass through settings in QEMU if there is only one root -> -port. -> -You are right! After adding an extra HB in qemu, I can create a x1 -region successfully. -But have some errors in Nvdimm: - -[ 74.925838] Unknown online node for memory at 0x10000000000, assuming node 0 -[ 74.925846] Unknown target node for memory at 0x10000000000, assuming node 0 -[ 74.927470] nd_region region0: nmem0: is disabled, failing probe - -And x4 region still failed with same errors, using latest cxl/preview -branch don't work. -I have picked "Two CXL emulation fixes" patches in qemu, still not working. - -Bob - -On Tue, 9 Aug 2022 21:07:06 +0800 -Bobo WL wrote: - -> -Hi Jonathan -> -> -Thanks for your reply! -> -> -On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron -> - wrote: -> -> -> -> Probably not related to your problem, but there is a disconnect in QEMU / -> -> kernel assumptionsaround the presence of an HDM decoder when a HB only -> -> has a single root port. Spec allows it to be provided or not as an -> -> implementation choice. -> -> Kernel assumes it isn't provide. Qemu assumes it is. -> -> -> -> The temporary solution is to throw in a second root port on the HB and not -> -> connect anything to it. Longer term I may special case this so that the -> -> particular -> -> decoder defaults to pass through settings in QEMU if there is only one root -> -> port. -> -> -> -> -You are right! After adding an extra HB in qemu, I can create a x1 -> -region successfully. -> -But have some errors in Nvdimm: -> -> -[ 74.925838] Unknown online node for memory at 0x10000000000, assuming node > 0 -> -[ 74.925846] Unknown target node for memory at 0x10000000000, assuming node > 0 -> -[ 74.927470] nd_region region0: nmem0: is disabled, failing probe -Ah. I've seen this one, but not chased it down yet. Was on my todo list to -chase -down. Once I reach this state I can verify the HDM Decode is correct which is -what -I've been using to test (Which wasn't true until earlier this week). -I'm currently testing via devmem, more for historical reasons than because it -makes -that much sense anymore. - -> -> -And x4 region still failed with same errors, using latest cxl/preview -> -branch don't work. -> -I have picked "Two CXL emulation fixes" patches in qemu, still not working. -> -> -Bob - -On Tue, 9 Aug 2022 17:08:25 +0100 -Jonathan Cameron wrote: - -> -On Tue, 9 Aug 2022 21:07:06 +0800 -> -Bobo WL wrote: -> -> -> Hi Jonathan -> -> -> -> Thanks for your reply! -> -> -> -> On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron -> -> wrote: -> -> > -> -> > Probably not related to your problem, but there is a disconnect in QEMU / -> -> > kernel assumptionsaround the presence of an HDM decoder when a HB only -> -> > has a single root port. Spec allows it to be provided or not as an -> -> > implementation choice. -> -> > Kernel assumes it isn't provide. Qemu assumes it is. -> -> > -> -> > The temporary solution is to throw in a second root port on the HB and not -> -> > connect anything to it. Longer term I may special case this so that the -> -> > particular -> -> > decoder defaults to pass through settings in QEMU if there is only one -> -> > root port. -> -> > -> -> -> -> You are right! After adding an extra HB in qemu, I can create a x1 -> -> region successfully. -> -> But have some errors in Nvdimm: -> -> -> -> [ 74.925838] Unknown online node for memory at 0x10000000000, assuming -> -> node 0 -> -> [ 74.925846] Unknown target node for memory at 0x10000000000, assuming -> -> node 0 -> -> [ 74.927470] nd_region region0: nmem0: is disabled, failing probe -> -> -Ah. I've seen this one, but not chased it down yet. Was on my todo list to -> -chase -> -down. Once I reach this state I can verify the HDM Decode is correct which is -> -what -> -I've been using to test (Which wasn't true until earlier this week). -> -I'm currently testing via devmem, more for historical reasons than because it -> -makes -> -that much sense anymore. -*embarassed cough*. We haven't fully hooked the LSA up in qemu yet. -I'd forgotten that was still on the todo list. I don't think it will -be particularly hard to do and will take a look in next few days. - -Very very indirectly this error is causing a driver probe fail that means that -we hit a code path that has a rather odd looking check on NDD_LABELING. -Should not have gotten near that path though - hence the problem is actually -when we call cxl_pmem_get_config_data() and it returns an error because -we haven't fully connected up the command in QEMU. - -Jonathan - - -> -> -> -> -> And x4 region still failed with same errors, using latest cxl/preview -> -> branch don't work. -> -> I have picked "Two CXL emulation fixes" patches in qemu, still not working. -> -> -> -> Bob - -On Thu, 11 Aug 2022 18:08:57 +0100 -Jonathan Cameron via wrote: - -> -On Tue, 9 Aug 2022 17:08:25 +0100 -> -Jonathan Cameron wrote: -> -> -> On Tue, 9 Aug 2022 21:07:06 +0800 -> -> Bobo WL wrote: -> -> -> -> > Hi Jonathan -> -> > -> -> > Thanks for your reply! -> -> > -> -> > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron -> -> > wrote: -> -> > > -> -> > > Probably not related to your problem, but there is a disconnect in QEMU -> -> > > / -> -> > > kernel assumptionsaround the presence of an HDM decoder when a HB only -> -> > > has a single root port. Spec allows it to be provided or not as an -> -> > > implementation choice. -> -> > > Kernel assumes it isn't provide. Qemu assumes it is. -> -> > > -> -> > > The temporary solution is to throw in a second root port on the HB and -> -> > > not -> -> > > connect anything to it. Longer term I may special case this so that -> -> > > the particular -> -> > > decoder defaults to pass through settings in QEMU if there is only one -> -> > > root port. -> -> > > -> -> > -> -> > You are right! After adding an extra HB in qemu, I can create a x1 -> -> > region successfully. -> -> > But have some errors in Nvdimm: -> -> > -> -> > [ 74.925838] Unknown online node for memory at 0x10000000000, assuming -> -> > node 0 -> -> > [ 74.925846] Unknown target node for memory at 0x10000000000, assuming -> -> > node 0 -> -> > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe -> -> -> -> Ah. I've seen this one, but not chased it down yet. Was on my todo list to -> -> chase -> -> down. Once I reach this state I can verify the HDM Decode is correct which -> -> is what -> -> I've been using to test (Which wasn't true until earlier this week). -> -> I'm currently testing via devmem, more for historical reasons than because -> -> it makes -> -> that much sense anymore. -> -> -*embarassed cough*. We haven't fully hooked the LSA up in qemu yet. -> -I'd forgotten that was still on the todo list. I don't think it will -> -be particularly hard to do and will take a look in next few days. -> -> -Very very indirectly this error is causing a driver probe fail that means that -> -we hit a code path that has a rather odd looking check on NDD_LABELING. -> -Should not have gotten near that path though - hence the problem is actually -> -when we call cxl_pmem_get_config_data() and it returns an error because -> -we haven't fully connected up the command in QEMU. -So a least one bug in QEMU. We were not supporting variable length payloads on -mailbox -inputs (but were on outputs). That hasn't mattered until we get to LSA writes. -We just need to relax condition on the supplied length. - -diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c -index c352a935c4..fdda9529fe 100644 ---- a/hw/cxl/cxl-mailbox-utils.c -+++ b/hw/cxl/cxl-mailbox-utils.c -@@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) - cxl_cmd = &cxl_cmd_set[set][cmd]; - h = cxl_cmd->handler; - if (h) { -- if (len == cxl_cmd->in) { -+ if (len == cxl_cmd->in || !cxl_cmd->in) { - cxl_cmd->payload = cxl_dstate->mbox_reg_state + - A_CXL_DEV_CMD_PAYLOAD; - ret = (*h)(cxl_cmd, cxl_dstate, &len); - - -This lets the nvdimm/region probe fine, but I'm getting some issues with -namespace capacity so I'll look at what is causing that next. -Unfortunately I'm not that familiar with the driver/nvdimm side of things -so it's take a while to figure out what kicks off what! - -Jonathan - -> -> -Jonathan -> -> -> -> -> -> > -> -> > And x4 region still failed with same errors, using latest cxl/preview -> -> > branch don't work. -> -> > I have picked "Two CXL emulation fixes" patches in qemu, still not -> -> > working. -> -> > -> -> > Bob -> -> - -Jonathan Cameron wrote: -> -On Thu, 11 Aug 2022 18:08:57 +0100 -> -Jonathan Cameron via wrote: -> -> -> On Tue, 9 Aug 2022 17:08:25 +0100 -> -> Jonathan Cameron wrote: -> -> -> -> > On Tue, 9 Aug 2022 21:07:06 +0800 -> -> > Bobo WL wrote: -> -> > -> -> > > Hi Jonathan -> -> > > -> -> > > Thanks for your reply! -> -> > > -> -> > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron -> -> > > wrote: -> -> > > > -> -> > > > Probably not related to your problem, but there is a disconnect in -> -> > > > QEMU / -> -> > > > kernel assumptionsaround the presence of an HDM decoder when a HB only -> -> > > > has a single root port. Spec allows it to be provided or not as an -> -> > > > implementation choice. -> -> > > > Kernel assumes it isn't provide. Qemu assumes it is. -> -> > > > -> -> > > > The temporary solution is to throw in a second root port on the HB -> -> > > > and not -> -> > > > connect anything to it. Longer term I may special case this so that -> -> > > > the particular -> -> > > > decoder defaults to pass through settings in QEMU if there is only -> -> > > > one root port. -> -> > > > -> -> > > -> -> > > You are right! After adding an extra HB in qemu, I can create a x1 -> -> > > region successfully. -> -> > > But have some errors in Nvdimm: -> -> > > -> -> > > [ 74.925838] Unknown online node for memory at 0x10000000000, -> -> > > assuming node 0 -> -> > > [ 74.925846] Unknown target node for memory at 0x10000000000, -> -> > > assuming node 0 -> -> > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe -> -> > -> -> > Ah. I've seen this one, but not chased it down yet. Was on my todo list -> -> > to chase -> -> > down. Once I reach this state I can verify the HDM Decode is correct -> -> > which is what -> -> > I've been using to test (Which wasn't true until earlier this week). -> -> > I'm currently testing via devmem, more for historical reasons than -> -> > because it makes -> -> > that much sense anymore. -> -> -> -> *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. -> -> I'd forgotten that was still on the todo list. I don't think it will -> -> be particularly hard to do and will take a look in next few days. -> -> -> -> Very very indirectly this error is causing a driver probe fail that means -> -> that -> -> we hit a code path that has a rather odd looking check on NDD_LABELING. -> -> Should not have gotten near that path though - hence the problem is actually -> -> when we call cxl_pmem_get_config_data() and it returns an error because -> -> we haven't fully connected up the command in QEMU. -> -> -So a least one bug in QEMU. We were not supporting variable length payloads -> -on mailbox -> -inputs (but were on outputs). That hasn't mattered until we get to LSA -> -writes. -> -We just need to relax condition on the supplied length. -> -> -diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c -> -index c352a935c4..fdda9529fe 100644 -> ---- a/hw/cxl/cxl-mailbox-utils.c -> -+++ b/hw/cxl/cxl-mailbox-utils.c -> -@@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) -> -cxl_cmd = &cxl_cmd_set[set][cmd]; -> -h = cxl_cmd->handler; -> -if (h) { -> -- if (len == cxl_cmd->in) { -> -+ if (len == cxl_cmd->in || !cxl_cmd->in) { -> -cxl_cmd->payload = cxl_dstate->mbox_reg_state + -> -A_CXL_DEV_CMD_PAYLOAD; -> -ret = (*h)(cxl_cmd, cxl_dstate, &len); -> -> -> -This lets the nvdimm/region probe fine, but I'm getting some issues with -> -namespace capacity so I'll look at what is causing that next. -> -Unfortunately I'm not that familiar with the driver/nvdimm side of things -> -so it's take a while to figure out what kicks off what! -The whirlwind tour is that 'struct nd_region' instances that represent a -persitent memory address range are composed of one more mappings of -'struct nvdimm' objects. The nvdimm object is driven by the dimm driver -in drivers/nvdimm/dimm.c. That driver is mainly charged with unlocking -the dimm (if locked) and interrogating the label area to look for -namespace labels. - -The label command calls are routed to the '->ndctl()' callback that was -registered when the CXL nvdimm_bus_descriptor was created. That callback -handles both 'bus' scope calls, currently none for CXL, and per nvdimm -calls. cxl_pmem_nvdimm_ctl() translates those generic LIBNVDIMM commands -to CXL commands. - -The 'struct nvdimm' objects that the CXL side registers have the -NDD_LABELING flag set which means that namespaces need to be explicitly -created / provisioned from region capacity. Otherwise, if -drivers/nvdimm/dimm.c does not find a namespace-label-index block then -the region reverts to label-less mode and a default namespace equal to -the size of the region is instantiated. - -If you are seeing small mismatches in namespace capacity then it may -just be the fact that by default 'ndctl create-namespace' results in an -'fsdax' mode namespace which just means that it is a block device where -1.5% of the capacity is reserved for 'struct page' metadata. You should -be able to see namespace capacity == region capacity by doing "ndctl -create-namespace -m raw", and disable DAX operation. - -Hope that helps. - -On Fri, 12 Aug 2022 09:03:02 -0700 -Dan Williams wrote: - -> -Jonathan Cameron wrote: -> -> On Thu, 11 Aug 2022 18:08:57 +0100 -> -> Jonathan Cameron via wrote: -> -> -> -> > On Tue, 9 Aug 2022 17:08:25 +0100 -> -> > Jonathan Cameron wrote: -> -> > -> -> > > On Tue, 9 Aug 2022 21:07:06 +0800 -> -> > > Bobo WL wrote: -> -> > > -> -> > > > Hi Jonathan -> -> > > > -> -> > > > Thanks for your reply! -> -> > > > -> -> > > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron -> -> > > > wrote: -> -> > > > > -> -> > > > > Probably not related to your problem, but there is a disconnect in -> -> > > > > QEMU / -> -> > > > > kernel assumptionsaround the presence of an HDM decoder when a HB -> -> > > > > only -> -> > > > > has a single root port. Spec allows it to be provided or not as an -> -> > > > > implementation choice. -> -> > > > > Kernel assumes it isn't provide. Qemu assumes it is. -> -> > > > > -> -> > > > > The temporary solution is to throw in a second root port on the HB -> -> > > > > and not -> -> > > > > connect anything to it. Longer term I may special case this so -> -> > > > > that the particular -> -> > > > > decoder defaults to pass through settings in QEMU if there is only -> -> > > > > one root port. -> -> > > > > -> -> > > > -> -> > > > You are right! After adding an extra HB in qemu, I can create a x1 -> -> > > > region successfully. -> -> > > > But have some errors in Nvdimm: -> -> > > > -> -> > > > [ 74.925838] Unknown online node for memory at 0x10000000000, -> -> > > > assuming node 0 -> -> > > > [ 74.925846] Unknown target node for memory at 0x10000000000, -> -> > > > assuming node 0 -> -> > > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe -> -> > > > -> -> > > -> -> > > Ah. I've seen this one, but not chased it down yet. Was on my todo -> -> > > list to chase -> -> > > down. Once I reach this state I can verify the HDM Decode is correct -> -> > > which is what -> -> > > I've been using to test (Which wasn't true until earlier this week). -> -> > > I'm currently testing via devmem, more for historical reasons than -> -> > > because it makes -> -> > > that much sense anymore. -> -> > -> -> > *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. -> -> > I'd forgotten that was still on the todo list. I don't think it will -> -> > be particularly hard to do and will take a look in next few days. -> -> > -> -> > Very very indirectly this error is causing a driver probe fail that means -> -> > that -> -> > we hit a code path that has a rather odd looking check on NDD_LABELING. -> -> > Should not have gotten near that path though - hence the problem is -> -> > actually -> -> > when we call cxl_pmem_get_config_data() and it returns an error because -> -> > we haven't fully connected up the command in QEMU. -> -> -> -> So a least one bug in QEMU. We were not supporting variable length payloads -> -> on mailbox -> -> inputs (but were on outputs). That hasn't mattered until we get to LSA -> -> writes. -> -> We just need to relax condition on the supplied length. -> -> -> -> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c -> -> index c352a935c4..fdda9529fe 100644 -> -> --- a/hw/cxl/cxl-mailbox-utils.c -> -> +++ b/hw/cxl/cxl-mailbox-utils.c -> -> @@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) -> -> cxl_cmd = &cxl_cmd_set[set][cmd]; -> -> h = cxl_cmd->handler; -> -> if (h) { -> -> - if (len == cxl_cmd->in) { -> -> + if (len == cxl_cmd->in || !cxl_cmd->in) { -> -> cxl_cmd->payload = cxl_dstate->mbox_reg_state + -> -> A_CXL_DEV_CMD_PAYLOAD; -> -> ret = (*h)(cxl_cmd, cxl_dstate, &len); -> -> -> -> -> -> This lets the nvdimm/region probe fine, but I'm getting some issues with -> -> namespace capacity so I'll look at what is causing that next. -> -> Unfortunately I'm not that familiar with the driver/nvdimm side of things -> -> so it's take a while to figure out what kicks off what! -> -> -The whirlwind tour is that 'struct nd_region' instances that represent a -> -persitent memory address range are composed of one more mappings of -> -'struct nvdimm' objects. The nvdimm object is driven by the dimm driver -> -in drivers/nvdimm/dimm.c. That driver is mainly charged with unlocking -> -the dimm (if locked) and interrogating the label area to look for -> -namespace labels. -> -> -The label command calls are routed to the '->ndctl()' callback that was -> -registered when the CXL nvdimm_bus_descriptor was created. That callback -> -handles both 'bus' scope calls, currently none for CXL, and per nvdimm -> -calls. cxl_pmem_nvdimm_ctl() translates those generic LIBNVDIMM commands -> -to CXL commands. -> -> -The 'struct nvdimm' objects that the CXL side registers have the -> -NDD_LABELING flag set which means that namespaces need to be explicitly -> -created / provisioned from region capacity. Otherwise, if -> -drivers/nvdimm/dimm.c does not find a namespace-label-index block then -> -the region reverts to label-less mode and a default namespace equal to -> -the size of the region is instantiated. -> -> -If you are seeing small mismatches in namespace capacity then it may -> -just be the fact that by default 'ndctl create-namespace' results in an -> -'fsdax' mode namespace which just means that it is a block device where -> -1.5% of the capacity is reserved for 'struct page' metadata. You should -> -be able to see namespace capacity == region capacity by doing "ndctl -> -create-namespace -m raw", and disable DAX operation. -Currently ndctl create-namespace crashes qemu ;) -Which isn't ideal! - -> -> -Hope that helps. -Got me looking at the right code. Thanks! - -Jonathan - -On Fri, 12 Aug 2022 17:15:09 +0100 -Jonathan Cameron wrote: - -> -On Fri, 12 Aug 2022 09:03:02 -0700 -> -Dan Williams wrote: -> -> -> Jonathan Cameron wrote: -> -> > On Thu, 11 Aug 2022 18:08:57 +0100 -> -> > Jonathan Cameron via wrote: -> -> > -> -> > > On Tue, 9 Aug 2022 17:08:25 +0100 -> -> > > Jonathan Cameron wrote: -> -> > > -> -> > > > On Tue, 9 Aug 2022 21:07:06 +0800 -> -> > > > Bobo WL wrote: -> -> > > > -> -> > > > > Hi Jonathan -> -> > > > > -> -> > > > > Thanks for your reply! -> -> > > > > -> -> > > > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron -> -> > > > > wrote: -> -> > > > > > -> -> > > > > > Probably not related to your problem, but there is a disconnect -> -> > > > > > in QEMU / -> -> > > > > > kernel assumptionsaround the presence of an HDM decoder when a HB -> -> > > > > > only -> -> > > > > > has a single root port. Spec allows it to be provided or not as -> -> > > > > > an implementation choice. -> -> > > > > > Kernel assumes it isn't provide. Qemu assumes it is. -> -> > > > > > -> -> > > > > > The temporary solution is to throw in a second root port on the -> -> > > > > > HB and not -> -> > > > > > connect anything to it. Longer term I may special case this so -> -> > > > > > that the particular -> -> > > > > > decoder defaults to pass through settings in QEMU if there is -> -> > > > > > only one root port. -> -> > > > > > -> -> > > > > -> -> > > > > You are right! After adding an extra HB in qemu, I can create a x1 -> -> > > > > region successfully. -> -> > > > > But have some errors in Nvdimm: -> -> > > > > -> -> > > > > [ 74.925838] Unknown online node for memory at 0x10000000000, -> -> > > > > assuming node 0 -> -> > > > > [ 74.925846] Unknown target node for memory at 0x10000000000, -> -> > > > > assuming node 0 -> -> > > > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe -> -> > > > > -> -> > > > -> -> > > > Ah. I've seen this one, but not chased it down yet. Was on my todo -> -> > > > list to chase -> -> > > > down. Once I reach this state I can verify the HDM Decode is correct -> -> > > > which is what -> -> > > > I've been using to test (Which wasn't true until earlier this week). -> -> > > > I'm currently testing via devmem, more for historical reasons than -> -> > > > because it makes -> -> > > > that much sense anymore. -> -> > > -> -> > > *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. -> -> > > I'd forgotten that was still on the todo list. I don't think it will -> -> > > be particularly hard to do and will take a look in next few days. -> -> > > -> -> > > Very very indirectly this error is causing a driver probe fail that -> -> > > means that -> -> > > we hit a code path that has a rather odd looking check on NDD_LABELING. -> -> > > Should not have gotten near that path though - hence the problem is -> -> > > actually -> -> > > when we call cxl_pmem_get_config_data() and it returns an error because -> -> > > we haven't fully connected up the command in QEMU. -> -> > -> -> > So a least one bug in QEMU. We were not supporting variable length -> -> > payloads on mailbox -> -> > inputs (but were on outputs). That hasn't mattered until we get to LSA -> -> > writes. -> -> > We just need to relax condition on the supplied length. -> -> > -> -> > diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c -> -> > index c352a935c4..fdda9529fe 100644 -> -> > --- a/hw/cxl/cxl-mailbox-utils.c -> -> > +++ b/hw/cxl/cxl-mailbox-utils.c -> -> > @@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) -> -> > cxl_cmd = &cxl_cmd_set[set][cmd]; -> -> > h = cxl_cmd->handler; -> -> > if (h) { -> -> > - if (len == cxl_cmd->in) { -> -> > + if (len == cxl_cmd->in || !cxl_cmd->in) { -> -> > cxl_cmd->payload = cxl_dstate->mbox_reg_state + -> -> > A_CXL_DEV_CMD_PAYLOAD; -> -> > ret = (*h)(cxl_cmd, cxl_dstate, &len); -> -> > -> -> > -> -> > This lets the nvdimm/region probe fine, but I'm getting some issues with -> -> > namespace capacity so I'll look at what is causing that next. -> -> > Unfortunately I'm not that familiar with the driver/nvdimm side of things -> -> > so it's take a while to figure out what kicks off what! -> -> -> -> The whirlwind tour is that 'struct nd_region' instances that represent a -> -> persitent memory address range are composed of one more mappings of -> -> 'struct nvdimm' objects. The nvdimm object is driven by the dimm driver -> -> in drivers/nvdimm/dimm.c. That driver is mainly charged with unlocking -> -> the dimm (if locked) and interrogating the label area to look for -> -> namespace labels. -> -> -> -> The label command calls are routed to the '->ndctl()' callback that was -> -> registered when the CXL nvdimm_bus_descriptor was created. That callback -> -> handles both 'bus' scope calls, currently none for CXL, and per nvdimm -> -> calls. cxl_pmem_nvdimm_ctl() translates those generic LIBNVDIMM commands -> -> to CXL commands. -> -> -> -> The 'struct nvdimm' objects that the CXL side registers have the -> -> NDD_LABELING flag set which means that namespaces need to be explicitly -> -> created / provisioned from region capacity. Otherwise, if -> -> drivers/nvdimm/dimm.c does not find a namespace-label-index block then -> -> the region reverts to label-less mode and a default namespace equal to -> -> the size of the region is instantiated. -> -> -> -> If you are seeing small mismatches in namespace capacity then it may -> -> just be the fact that by default 'ndctl create-namespace' results in an -> -> 'fsdax' mode namespace which just means that it is a block device where -> -> 1.5% of the capacity is reserved for 'struct page' metadata. You should -> -> be able to see namespace capacity == region capacity by doing "ndctl -> -> create-namespace -m raw", and disable DAX operation. -> -> -Currently ndctl create-namespace crashes qemu ;) -> -Which isn't ideal! -> -Found a cause for this one. Mailbox payload may be as small as 256 bytes. -We have code in kernel sanity checking that output payload fits in the -mailbox, but nothing on the input payload. Symptom is that we write just -off the end whatever size the payload is. Note doing this shouldn't crash -qemu - so I need to fix a range check somewhere. - -I think this is because cxl_pmem_get_config_size() returns the mailbox -payload size as being the available LSA size, forgetting to remove the -size of the headers on the set_lsa side of things. -https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git/tree/drivers/cxl/pmem.c?h=next#n110 -I've hacked the max_payload to be -8 - -Now we still don't succeed in creating the namespace, but bonus is it doesn't -crash any more. - - -Jonathan - - - -> -> -> -> Hope that helps. -> -Got me looking at the right code. Thanks! -> -> -Jonathan -> -> - -On Mon, 15 Aug 2022 15:18:09 +0100 -Jonathan Cameron via wrote: - -> -On Fri, 12 Aug 2022 17:15:09 +0100 -> -Jonathan Cameron wrote: -> -> -> On Fri, 12 Aug 2022 09:03:02 -0700 -> -> Dan Williams wrote: -> -> -> -> > Jonathan Cameron wrote: -> -> > > On Thu, 11 Aug 2022 18:08:57 +0100 -> -> > > Jonathan Cameron via wrote: -> -> > > -> -> > > > On Tue, 9 Aug 2022 17:08:25 +0100 -> -> > > > Jonathan Cameron wrote: -> -> > > > -> -> > > > > On Tue, 9 Aug 2022 21:07:06 +0800 -> -> > > > > Bobo WL wrote: -> -> > > > > -> -> > > > > > Hi Jonathan -> -> > > > > > -> -> > > > > > Thanks for your reply! -> -> > > > > > -> -> > > > > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron -> -> > > > > > wrote: -> -> > > > > > > -> -> > > > > > > Probably not related to your problem, but there is a disconnect -> -> > > > > > > in QEMU / -> -> > > > > > > kernel assumptionsaround the presence of an HDM decoder when a -> -> > > > > > > HB only -> -> > > > > > > has a single root port. Spec allows it to be provided or not as -> -> > > > > > > an implementation choice. -> -> > > > > > > Kernel assumes it isn't provide. Qemu assumes it is. -> -> > > > > > > -> -> > > > > > > The temporary solution is to throw in a second root port on the -> -> > > > > > > HB and not -> -> > > > > > > connect anything to it. Longer term I may special case this so -> -> > > > > > > that the particular -> -> > > > > > > decoder defaults to pass through settings in QEMU if there is -> -> > > > > > > only one root port. -> -> > > > > > > -> -> > > > > > -> -> > > > > > You are right! After adding an extra HB in qemu, I can create a x1 -> -> > > > > > region successfully. -> -> > > > > > But have some errors in Nvdimm: -> -> > > > > > -> -> > > > > > [ 74.925838] Unknown online node for memory at 0x10000000000, -> -> > > > > > assuming node 0 -> -> > > > > > [ 74.925846] Unknown target node for memory at 0x10000000000, -> -> > > > > > assuming node 0 -> -> > > > > > [ 74.927470] nd_region region0: nmem0: is disabled, failing -> -> > > > > > probe -> -> > > > > -> -> > > > > Ah. I've seen this one, but not chased it down yet. Was on my todo -> -> > > > > list to chase -> -> > > > > down. Once I reach this state I can verify the HDM Decode is -> -> > > > > correct which is what -> -> > > > > I've been using to test (Which wasn't true until earlier this -> -> > > > > week). -> -> > > > > I'm currently testing via devmem, more for historical reasons than -> -> > > > > because it makes -> -> > > > > that much sense anymore. -> -> > > > -> -> > > > *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. -> -> > > > I'd forgotten that was still on the todo list. I don't think it will -> -> > > > be particularly hard to do and will take a look in next few days. -> -> > > > -> -> > > > Very very indirectly this error is causing a driver probe fail that -> -> > > > means that -> -> > > > we hit a code path that has a rather odd looking check on -> -> > > > NDD_LABELING. -> -> > > > Should not have gotten near that path though - hence the problem is -> -> > > > actually -> -> > > > when we call cxl_pmem_get_config_data() and it returns an error -> -> > > > because -> -> > > > we haven't fully connected up the command in QEMU. -> -> > > -> -> > > So a least one bug in QEMU. We were not supporting variable length -> -> > > payloads on mailbox -> -> > > inputs (but were on outputs). That hasn't mattered until we get to LSA -> -> > > writes. -> -> > > We just need to relax condition on the supplied length. -> -> > > -> -> > > diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c -> -> > > index c352a935c4..fdda9529fe 100644 -> -> > > --- a/hw/cxl/cxl-mailbox-utils.c -> -> > > +++ b/hw/cxl/cxl-mailbox-utils.c -> -> > > @@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) -> -> > > cxl_cmd = &cxl_cmd_set[set][cmd]; -> -> > > h = cxl_cmd->handler; -> -> > > if (h) { -> -> > > - if (len == cxl_cmd->in) { -> -> > > + if (len == cxl_cmd->in || !cxl_cmd->in) { -> -> > > cxl_cmd->payload = cxl_dstate->mbox_reg_state + -> -> > > A_CXL_DEV_CMD_PAYLOAD; -> -> > > ret = (*h)(cxl_cmd, cxl_dstate, &len); -> -> > > -> -> > > -> -> > > This lets the nvdimm/region probe fine, but I'm getting some issues with -> -> > > namespace capacity so I'll look at what is causing that next. -> -> > > Unfortunately I'm not that familiar with the driver/nvdimm side of -> -> > > things -> -> > > so it's take a while to figure out what kicks off what! -> -> > -> -> > The whirlwind tour is that 'struct nd_region' instances that represent a -> -> > persitent memory address range are composed of one more mappings of -> -> > 'struct nvdimm' objects. The nvdimm object is driven by the dimm driver -> -> > in drivers/nvdimm/dimm.c. That driver is mainly charged with unlocking -> -> > the dimm (if locked) and interrogating the label area to look for -> -> > namespace labels. -> -> > -> -> > The label command calls are routed to the '->ndctl()' callback that was -> -> > registered when the CXL nvdimm_bus_descriptor was created. That callback -> -> > handles both 'bus' scope calls, currently none for CXL, and per nvdimm -> -> > calls. cxl_pmem_nvdimm_ctl() translates those generic LIBNVDIMM commands -> -> > to CXL commands. -> -> > -> -> > The 'struct nvdimm' objects that the CXL side registers have the -> -> > NDD_LABELING flag set which means that namespaces need to be explicitly -> -> > created / provisioned from region capacity. Otherwise, if -> -> > drivers/nvdimm/dimm.c does not find a namespace-label-index block then -> -> > the region reverts to label-less mode and a default namespace equal to -> -> > the size of the region is instantiated. -> -> > -> -> > If you are seeing small mismatches in namespace capacity then it may -> -> > just be the fact that by default 'ndctl create-namespace' results in an -> -> > 'fsdax' mode namespace which just means that it is a block device where -> -> > 1.5% of the capacity is reserved for 'struct page' metadata. You should -> -> > be able to see namespace capacity == region capacity by doing "ndctl -> -> > create-namespace -m raw", and disable DAX operation. -> -> -> -> Currently ndctl create-namespace crashes qemu ;) -> -> Which isn't ideal! -> -> -> -> -Found a cause for this one. Mailbox payload may be as small as 256 bytes. -> -We have code in kernel sanity checking that output payload fits in the -> -mailbox, but nothing on the input payload. Symptom is that we write just -> -off the end whatever size the payload is. Note doing this shouldn't crash -> -qemu - so I need to fix a range check somewhere. -> -> -I think this is because cxl_pmem_get_config_size() returns the mailbox -> -payload size as being the available LSA size, forgetting to remove the -> -size of the headers on the set_lsa side of things. -> -https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git/tree/drivers/cxl/pmem.c?h=next#n110 -> -> -I've hacked the max_payload to be -8 -> -> -Now we still don't succeed in creating the namespace, but bonus is it doesn't -> -crash any more. -In the interests of defensive / correct handling from QEMU I took a -look into why it was crashing. Turns out that providing a NULL write callback -for -the memory device region (that the above overlarge write was spilling into) -isn't -a safe thing to do. Needs a stub. Oops. - -On plus side we might never have noticed this was going wrong without the crash -*silver lining in every cloud* - -Fix to follow... - -Jonathan - - -> -> -> -Jonathan -> -> -> -> -> > -> -> > Hope that helps. -> -> Got me looking at the right code. Thanks! -> -> -> -> Jonathan -> -> -> -> -> -> - -On Mon, 15 Aug 2022 at 15:55, Jonathan Cameron via wrote: -> -In the interests of defensive / correct handling from QEMU I took a -> -look into why it was crashing. Turns out that providing a NULL write -> -callback for -> -the memory device region (that the above overlarge write was spilling into) -> -isn't -> -a safe thing to do. Needs a stub. Oops. -Yeah. We've talked before about adding an assert so that that kind of -"missing function" bug is caught at device creation rather than only -if the guest tries to access the device, but we never quite got around -to it... - --- PMM - -On Fri, 12 Aug 2022 16:44:03 +0100 -Jonathan Cameron wrote: - -> -On Thu, 11 Aug 2022 18:08:57 +0100 -> -Jonathan Cameron via wrote: -> -> -> On Tue, 9 Aug 2022 17:08:25 +0100 -> -> Jonathan Cameron wrote: -> -> -> -> > On Tue, 9 Aug 2022 21:07:06 +0800 -> -> > Bobo WL wrote: -> -> > -> -> > > Hi Jonathan -> -> > > -> -> > > Thanks for your reply! -> -> > > -> -> > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron -> -> > > wrote: -> -> > > > -> -> > > > Probably not related to your problem, but there is a disconnect in -> -> > > > QEMU / -> -> > > > kernel assumptionsaround the presence of an HDM decoder when a HB only -> -> > > > has a single root port. Spec allows it to be provided or not as an -> -> > > > implementation choice. -> -> > > > Kernel assumes it isn't provide. Qemu assumes it is. -> -> > > > -> -> > > > The temporary solution is to throw in a second root port on the HB -> -> > > > and not -> -> > > > connect anything to it. Longer term I may special case this so that -> -> > > > the particular -> -> > > > decoder defaults to pass through settings in QEMU if there is only -> -> > > > one root port. -> -> > > > -> -> > > -> -> > > You are right! After adding an extra HB in qemu, I can create a x1 -> -> > > region successfully. -> -> > > But have some errors in Nvdimm: -> -> > > -> -> > > [ 74.925838] Unknown online node for memory at 0x10000000000, -> -> > > assuming node 0 -> -> > > [ 74.925846] Unknown target node for memory at 0x10000000000, -> -> > > assuming node 0 -> -> > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe -> -> > > -> -> > -> -> > Ah. I've seen this one, but not chased it down yet. Was on my todo list -> -> > to chase -> -> > down. Once I reach this state I can verify the HDM Decode is correct -> -> > which is what -> -> > I've been using to test (Which wasn't true until earlier this week). -> -> > I'm currently testing via devmem, more for historical reasons than -> -> > because it makes -> -> > that much sense anymore. -> -> -> -> *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. -> -> I'd forgotten that was still on the todo list. I don't think it will -> -> be particularly hard to do and will take a look in next few days. -> -> -> -> Very very indirectly this error is causing a driver probe fail that means -> -> that -> -> we hit a code path that has a rather odd looking check on NDD_LABELING. -> -> Should not have gotten near that path though - hence the problem is actually -> -> when we call cxl_pmem_get_config_data() and it returns an error because -> -> we haven't fully connected up the command in QEMU. -> -> -So a least one bug in QEMU. We were not supporting variable length payloads -> -on mailbox -> -inputs (but were on outputs). That hasn't mattered until we get to LSA -> -writes. -> -We just need to relax condition on the supplied length. -> -> -diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c -> -index c352a935c4..fdda9529fe 100644 -> ---- a/hw/cxl/cxl-mailbox-utils.c -> -+++ b/hw/cxl/cxl-mailbox-utils.c -> -@@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) -> -cxl_cmd = &cxl_cmd_set[set][cmd]; -> -h = cxl_cmd->handler; -> -if (h) { -> -- if (len == cxl_cmd->in) { -> -+ if (len == cxl_cmd->in || !cxl_cmd->in) { -Fix is wrong as we use ~0 as the placeholder for variable payload, not 0. - -With that fixed we hit new fun paths - after some errors we get the -worrying - not totally sure but looks like a failure on an error cleanup. -I'll chase down the error source, but even then this is probably triggerable by -hardware problem or similar. Some bonus prints in here from me chasing -error paths, but it's otherwise just cxl/next + the fix I posted earlier today. - -[ 69.919877] nd_bus ndbus0: START: nd_region.probe(region0) -[ 69.920108] nd_region_probe -[ 69.920623] ------------[ cut here ]------------ -[ 69.920675] refcount_t: addition on 0; use-after-free. -[ 69.921314] WARNING: CPU: 3 PID: 710 at lib/refcount.c:25 -refcount_warn_saturate+0xa0/0x144 -[ 69.926949] Modules linked in: cxl_pmem cxl_mem cxl_pci cxl_port cxl_acpi -cxl_core -[ 69.928830] CPU: 3 PID: 710 Comm: kworker/u8:9 Not tainted 5.19.0-rc3+ #399 -[ 69.930596] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 -[ 69.931482] Workqueue: events_unbound async_run_entry_fn -[ 69.932403] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) -[ 69.934023] pc : refcount_warn_saturate+0xa0/0x144 -[ 69.935161] lr : refcount_warn_saturate+0xa0/0x144 -[ 69.936541] sp : ffff80000890b960 -[ 69.937921] x29: ffff80000890b960 x28: 0000000000000000 x27: 0000000000000000 -[ 69.940917] x26: ffffa54a90d5cb10 x25: ffffa54a90809e98 x24: 0000000000000000 -[ 69.942537] x23: ffffa54a91a3d8d8 x22: ffff0000c5254800 x21: ffff0000c5254800 -[ 69.944013] x20: ffff0000ce924180 x19: ffff0000c5254800 x18: ffffffffffffffff -[ 69.946100] x17: ffff5ab66e5ef000 x16: ffff80000801c000 x15: 0000000000000000 -[ 69.947585] x14: 0000000000000001 x13: 0a2e656572662d72 x12: 657466612d657375 -[ 69.948670] x11: 203b30206e6f206e x10: 6f69746964646120 x9 : ffffa54a8f63d288 -[ 69.950679] x8 : 206e6f206e6f6974 x7 : 69646461203a745f x6 : 00000000fffff31e -[ 69.952113] x5 : ffff0000ff61ba08 x4 : 00000000fffff31e x3 : ffff5ab66e5ef000 -root@debian:/sys/bus/cxl/devices/decoder0.0/region0# [ 69.954752] x2 : -0000000000000000 x1 : 0000000000000000 x0 : ffff0000c512e740 -[ 69.957098] Call trace: -[ 69.957959] refcount_warn_saturate+0xa0/0x144 -[ 69.958773] get_ndd+0x5c/0x80 -[ 69.959294] nd_region_register_namespaces+0xe4/0xe90 -[ 69.960253] nd_region_probe+0x100/0x290 -[ 69.960796] nvdimm_bus_probe+0xf4/0x1c0 -[ 69.962087] really_probe+0x19c/0x3f0 -[ 69.962620] __driver_probe_device+0x11c/0x190 -[ 69.963258] driver_probe_device+0x44/0xf4 -[ 69.963773] __device_attach_driver+0xa4/0x140 -[ 69.964471] bus_for_each_drv+0x84/0xe0 -[ 69.965068] __device_attach+0xb0/0x1f0 -[ 69.966101] device_initial_probe+0x20/0x30 -[ 69.967142] bus_probe_device+0xa4/0xb0 -[ 69.968104] device_add+0x3e8/0x910 -[ 69.969111] nd_async_device_register+0x24/0x74 -[ 69.969928] async_run_entry_fn+0x40/0x150 -[ 69.970725] process_one_work+0x1dc/0x450 -[ 69.971796] worker_thread+0x154/0x450 -[ 69.972700] kthread+0x118/0x120 -[ 69.974141] ret_from_fork+0x10/0x20 -[ 69.975141] ---[ end trace 0000000000000000 ]--- -[ 70.117887] Into nd_namespace_pmem_set_resource() - -> -cxl_cmd->payload = cxl_dstate->mbox_reg_state + -> -A_CXL_DEV_CMD_PAYLOAD; -> -ret = (*h)(cxl_cmd, cxl_dstate, &len); -> -> -> -This lets the nvdimm/region probe fine, but I'm getting some issues with -> -namespace capacity so I'll look at what is causing that next. -> -Unfortunately I'm not that familiar with the driver/nvdimm side of things -> -so it's take a while to figure out what kicks off what! -> -> -Jonathan -> -> -> -> -> Jonathan -> -> -> -> -> -> > -> -> > > -> -> > > And x4 region still failed with same errors, using latest cxl/preview -> -> > > branch don't work. -> -> > > I have picked "Two CXL emulation fixes" patches in qemu, still not -> -> > > working. -> -> > > -> -> > > Bob -> -> -> -> -> - -On Mon, 15 Aug 2022 18:04:44 +0100 -Jonathan Cameron wrote: - -> -On Fri, 12 Aug 2022 16:44:03 +0100 -> -Jonathan Cameron wrote: -> -> -> On Thu, 11 Aug 2022 18:08:57 +0100 -> -> Jonathan Cameron via wrote: -> -> -> -> > On Tue, 9 Aug 2022 17:08:25 +0100 -> -> > Jonathan Cameron wrote: -> -> > -> -> > > On Tue, 9 Aug 2022 21:07:06 +0800 -> -> > > Bobo WL wrote: -> -> > > -> -> > > > Hi Jonathan -> -> > > > -> -> > > > Thanks for your reply! -> -> > > > -> -> > > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron -> -> > > > wrote: -> -> > > > > -> -> > > > > Probably not related to your problem, but there is a disconnect in -> -> > > > > QEMU / -> -> > > > > kernel assumptionsaround the presence of an HDM decoder when a HB -> -> > > > > only -> -> > > > > has a single root port. Spec allows it to be provided or not as an -> -> > > > > implementation choice. -> -> > > > > Kernel assumes it isn't provide. Qemu assumes it is. -> -> > > > > -> -> > > > > The temporary solution is to throw in a second root port on the HB -> -> > > > > and not -> -> > > > > connect anything to it. Longer term I may special case this so -> -> > > > > that the particular -> -> > > > > decoder defaults to pass through settings in QEMU if there is only -> -> > > > > one root port. -> -> > > > > -> -> > > > -> -> > > > You are right! After adding an extra HB in qemu, I can create a x1 -> -> > > > region successfully. -> -> > > > But have some errors in Nvdimm: -> -> > > > -> -> > > > [ 74.925838] Unknown online node for memory at 0x10000000000, -> -> > > > assuming node 0 -> -> > > > [ 74.925846] Unknown target node for memory at 0x10000000000, -> -> > > > assuming node 0 -> -> > > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe -> -> > > > -> -> > > -> -> > > Ah. I've seen this one, but not chased it down yet. Was on my todo -> -> > > list to chase -> -> > > down. Once I reach this state I can verify the HDM Decode is correct -> -> > > which is what -> -> > > I've been using to test (Which wasn't true until earlier this week). -> -> > > I'm currently testing via devmem, more for historical reasons than -> -> > > because it makes -> -> > > that much sense anymore. -> -> > -> -> > *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. -> -> > I'd forgotten that was still on the todo list. I don't think it will -> -> > be particularly hard to do and will take a look in next few days. -> -> > -> -> > Very very indirectly this error is causing a driver probe fail that means -> -> > that -> -> > we hit a code path that has a rather odd looking check on NDD_LABELING. -> -> > Should not have gotten near that path though - hence the problem is -> -> > actually -> -> > when we call cxl_pmem_get_config_data() and it returns an error because -> -> > we haven't fully connected up the command in QEMU. -> -> -> -> So a least one bug in QEMU. We were not supporting variable length payloads -> -> on mailbox -> -> inputs (but were on outputs). That hasn't mattered until we get to LSA -> -> writes. -> -> We just need to relax condition on the supplied length. -> -> -> -> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c -> -> index c352a935c4..fdda9529fe 100644 -> -> --- a/hw/cxl/cxl-mailbox-utils.c -> -> +++ b/hw/cxl/cxl-mailbox-utils.c -> -> @@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) -> -> cxl_cmd = &cxl_cmd_set[set][cmd]; -> -> h = cxl_cmd->handler; -> -> if (h) { -> -> - if (len == cxl_cmd->in) { -> -> + if (len == cxl_cmd->in || !cxl_cmd->in) { -> -Fix is wrong as we use ~0 as the placeholder for variable payload, not 0. -Cause of the error is a failure in GET_LSA. -Reason, payload length is wrong in QEMU but was hidden previously by my wrong -fix here. Probably still a good idea to inject an error in GET_LSA and chase -down the refcount issue. - - -diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c -index fdda9529fe..e8565fbd6e 100644 ---- a/hw/cxl/cxl-mailbox-utils.c -+++ b/hw/cxl/cxl-mailbox-utils.c -@@ -489,7 +489,7 @@ static struct cxl_cmd cxl_cmd_set[256][256] = { - cmd_identify_memory_device, 0, 0 }, - [CCLS][GET_PARTITION_INFO] = { "CCLS_GET_PARTITION_INFO", - cmd_ccls_get_partition_info, 0, 0 }, -- [CCLS][GET_LSA] = { "CCLS_GET_LSA", cmd_ccls_get_lsa, 0, 0 }, -+ [CCLS][GET_LSA] = { "CCLS_GET_LSA", cmd_ccls_get_lsa, 8, 0 }, - [CCLS][SET_LSA] = { "CCLS_SET_LSA", cmd_ccls_set_lsa, - ~0, IMMEDIATE_CONFIG_CHANGE | IMMEDIATE_DATA_CHANGE }, - [MEDIA_AND_POISON][GET_POISON_LIST] = { "MEDIA_AND_POISON_GET_POISON_LIST", -@@ -510,12 +510,13 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) - cxl_cmd = &cxl_cmd_set[set][cmd]; - h = cxl_cmd->handler; - if (h) { -- if (len == cxl_cmd->in || !cxl_cmd->in) { -+ if (len == cxl_cmd->in || cxl_cmd->in == ~0) { - cxl_cmd->payload = cxl_dstate->mbox_reg_state + - A_CXL_DEV_CMD_PAYLOAD; - -And woot, we get a namespace in the LSA :) - -I'll post QEMU fixes in next day or two. Kernel side now seems more or less -fine be it with suspicious refcount underflow. - -> -> -With that fixed we hit new fun paths - after some errors we get the -> -worrying - not totally sure but looks like a failure on an error cleanup. -> -I'll chase down the error source, but even then this is probably triggerable -> -by -> -hardware problem or similar. Some bonus prints in here from me chasing -> -error paths, but it's otherwise just cxl/next + the fix I posted earlier -> -today. -> -> -[ 69.919877] nd_bus ndbus0: START: nd_region.probe(region0) -> -[ 69.920108] nd_region_probe -> -[ 69.920623] ------------[ cut here ]------------ -> -[ 69.920675] refcount_t: addition on 0; use-after-free. -> -[ 69.921314] WARNING: CPU: 3 PID: 710 at lib/refcount.c:25 -> -refcount_warn_saturate+0xa0/0x144 -> -[ 69.926949] Modules linked in: cxl_pmem cxl_mem cxl_pci cxl_port cxl_acpi -> -cxl_core -> -[ 69.928830] CPU: 3 PID: 710 Comm: kworker/u8:9 Not tainted 5.19.0-rc3+ #399 -> -[ 69.930596] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 -> -[ 69.931482] Workqueue: events_unbound async_run_entry_fn -> -[ 69.932403] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) -> -[ 69.934023] pc : refcount_warn_saturate+0xa0/0x144 -> -[ 69.935161] lr : refcount_warn_saturate+0xa0/0x144 -> -[ 69.936541] sp : ffff80000890b960 -> -[ 69.937921] x29: ffff80000890b960 x28: 0000000000000000 x27: -> -0000000000000000 -> -[ 69.940917] x26: ffffa54a90d5cb10 x25: ffffa54a90809e98 x24: -> -0000000000000000 -> -[ 69.942537] x23: ffffa54a91a3d8d8 x22: ffff0000c5254800 x21: -> -ffff0000c5254800 -> -[ 69.944013] x20: ffff0000ce924180 x19: ffff0000c5254800 x18: -> -ffffffffffffffff -> -[ 69.946100] x17: ffff5ab66e5ef000 x16: ffff80000801c000 x15: -> -0000000000000000 -> -[ 69.947585] x14: 0000000000000001 x13: 0a2e656572662d72 x12: -> -657466612d657375 -> -[ 69.948670] x11: 203b30206e6f206e x10: 6f69746964646120 x9 : -> -ffffa54a8f63d288 -> -[ 69.950679] x8 : 206e6f206e6f6974 x7 : 69646461203a745f x6 : -> -00000000fffff31e -> -[ 69.952113] x5 : ffff0000ff61ba08 x4 : 00000000fffff31e x3 : -> -ffff5ab66e5ef000 -> -root@debian:/sys/bus/cxl/devices/decoder0.0/region0# [ 69.954752] x2 : -> -0000000000000000 x1 : 0000000000000000 x0 : ffff0000c512e740 -> -[ 69.957098] Call trace: -> -[ 69.957959] refcount_warn_saturate+0xa0/0x144 -> -[ 69.958773] get_ndd+0x5c/0x80 -> -[ 69.959294] nd_region_register_namespaces+0xe4/0xe90 -> -[ 69.960253] nd_region_probe+0x100/0x290 -> -[ 69.960796] nvdimm_bus_probe+0xf4/0x1c0 -> -[ 69.962087] really_probe+0x19c/0x3f0 -> -[ 69.962620] __driver_probe_device+0x11c/0x190 -> -[ 69.963258] driver_probe_device+0x44/0xf4 -> -[ 69.963773] __device_attach_driver+0xa4/0x140 -> -[ 69.964471] bus_for_each_drv+0x84/0xe0 -> -[ 69.965068] __device_attach+0xb0/0x1f0 -> -[ 69.966101] device_initial_probe+0x20/0x30 -> -[ 69.967142] bus_probe_device+0xa4/0xb0 -> -[ 69.968104] device_add+0x3e8/0x910 -> -[ 69.969111] nd_async_device_register+0x24/0x74 -> -[ 69.969928] async_run_entry_fn+0x40/0x150 -> -[ 69.970725] process_one_work+0x1dc/0x450 -> -[ 69.971796] worker_thread+0x154/0x450 -> -[ 69.972700] kthread+0x118/0x120 -> -[ 69.974141] ret_from_fork+0x10/0x20 -> -[ 69.975141] ---[ end trace 0000000000000000 ]--- -> -[ 70.117887] Into nd_namespace_pmem_set_resource() -> -> -> cxl_cmd->payload = cxl_dstate->mbox_reg_state + -> -> A_CXL_DEV_CMD_PAYLOAD; -> -> ret = (*h)(cxl_cmd, cxl_dstate, &len); -> -> -> -> -> -> This lets the nvdimm/region probe fine, but I'm getting some issues with -> -> namespace capacity so I'll look at what is causing that next. -> -> Unfortunately I'm not that familiar with the driver/nvdimm side of things -> -> so it's take a while to figure out what kicks off what! -> -> -> -> Jonathan -> -> -> -> > -> -> > Jonathan -> -> > -> -> > -> -> > > -> -> > > > -> -> > > > And x4 region still failed with same errors, using latest cxl/preview -> -> > > > branch don't work. -> -> > > > I have picked "Two CXL emulation fixes" patches in qemu, still not -> -> > > > working. -> -> > > > -> -> > > > Bob -> -> > -> -> > -> -> -> - -Jonathan Cameron wrote: -> -On Fri, 12 Aug 2022 16:44:03 +0100 -> -Jonathan Cameron wrote: -> -> -> On Thu, 11 Aug 2022 18:08:57 +0100 -> -> Jonathan Cameron via wrote: -> -> -> -> > On Tue, 9 Aug 2022 17:08:25 +0100 -> -> > Jonathan Cameron wrote: -> -> > -> -> > > On Tue, 9 Aug 2022 21:07:06 +0800 -> -> > > Bobo WL wrote: -> -> > > -> -> > > > Hi Jonathan -> -> > > > -> -> > > > Thanks for your reply! -> -> > > > -> -> > > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron -> -> > > > wrote: -> -> > > > > -> -> > > > > Probably not related to your problem, but there is a disconnect in -> -> > > > > QEMU / -> -> > > > > kernel assumptionsaround the presence of an HDM decoder when a HB -> -> > > > > only -> -> > > > > has a single root port. Spec allows it to be provided or not as an -> -> > > > > implementation choice. -> -> > > > > Kernel assumes it isn't provide. Qemu assumes it is. -> -> > > > > -> -> > > > > The temporary solution is to throw in a second root port on the HB -> -> > > > > and not -> -> > > > > connect anything to it. Longer term I may special case this so -> -> > > > > that the particular -> -> > > > > decoder defaults to pass through settings in QEMU if there is only -> -> > > > > one root port. -> -> > > > > -> -> > > > -> -> > > > You are right! After adding an extra HB in qemu, I can create a x1 -> -> > > > region successfully. -> -> > > > But have some errors in Nvdimm: -> -> > > > -> -> > > > [ 74.925838] Unknown online node for memory at 0x10000000000, -> -> > > > assuming node 0 -> -> > > > [ 74.925846] Unknown target node for memory at 0x10000000000, -> -> > > > assuming node 0 -> -> > > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe -> -> > > > -> -> > > -> -> > > Ah. I've seen this one, but not chased it down yet. Was on my todo -> -> > > list to chase -> -> > > down. Once I reach this state I can verify the HDM Decode is correct -> -> > > which is what -> -> > > I've been using to test (Which wasn't true until earlier this week). -> -> > > I'm currently testing via devmem, more for historical reasons than -> -> > > because it makes -> -> > > that much sense anymore. -> -> > -> -> > *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. -> -> > I'd forgotten that was still on the todo list. I don't think it will -> -> > be particularly hard to do and will take a look in next few days. -> -> > -> -> > Very very indirectly this error is causing a driver probe fail that means -> -> > that -> -> > we hit a code path that has a rather odd looking check on NDD_LABELING. -> -> > Should not have gotten near that path though - hence the problem is -> -> > actually -> -> > when we call cxl_pmem_get_config_data() and it returns an error because -> -> > we haven't fully connected up the command in QEMU. -> -> -> -> So a least one bug in QEMU. We were not supporting variable length payloads -> -> on mailbox -> -> inputs (but were on outputs). That hasn't mattered until we get to LSA -> -> writes. -> -> We just need to relax condition on the supplied length. -> -> -> -> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c -> -> index c352a935c4..fdda9529fe 100644 -> -> --- a/hw/cxl/cxl-mailbox-utils.c -> -> +++ b/hw/cxl/cxl-mailbox-utils.c -> -> @@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) -> -> cxl_cmd = &cxl_cmd_set[set][cmd]; -> -> h = cxl_cmd->handler; -> -> if (h) { -> -> - if (len == cxl_cmd->in) { -> -> + if (len == cxl_cmd->in || !cxl_cmd->in) { -> -Fix is wrong as we use ~0 as the placeholder for variable payload, not 0. -> -> -With that fixed we hit new fun paths - after some errors we get the -> -worrying - not totally sure but looks like a failure on an error cleanup. -> -I'll chase down the error source, but even then this is probably triggerable -> -by -> -hardware problem or similar. Some bonus prints in here from me chasing -> -error paths, but it's otherwise just cxl/next + the fix I posted earlier -> -today. -One of the scenarios that I cannot rule out is nvdimm_probe() racing -nd_region_probe(), but given all the work it takes to create a region I -suspect all the nvdimm_probe() work to have completed... - -It is at least one potentially wrong hypothesis that needs to be chased -down. - -> -> -[ 69.919877] nd_bus ndbus0: START: nd_region.probe(region0) -> -[ 69.920108] nd_region_probe -> -[ 69.920623] ------------[ cut here ]------------ -> -[ 69.920675] refcount_t: addition on 0; use-after-free. -> -[ 69.921314] WARNING: CPU: 3 PID: 710 at lib/refcount.c:25 -> -refcount_warn_saturate+0xa0/0x144 -> -[ 69.926949] Modules linked in: cxl_pmem cxl_mem cxl_pci cxl_port cxl_acpi -> -cxl_core -> -[ 69.928830] CPU: 3 PID: 710 Comm: kworker/u8:9 Not tainted 5.19.0-rc3+ #399 -> -[ 69.930596] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 -> -[ 69.931482] Workqueue: events_unbound async_run_entry_fn -> -[ 69.932403] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) -> -[ 69.934023] pc : refcount_warn_saturate+0xa0/0x144 -> -[ 69.935161] lr : refcount_warn_saturate+0xa0/0x144 -> -[ 69.936541] sp : ffff80000890b960 -> -[ 69.937921] x29: ffff80000890b960 x28: 0000000000000000 x27: -> -0000000000000000 -> -[ 69.940917] x26: ffffa54a90d5cb10 x25: ffffa54a90809e98 x24: -> -0000000000000000 -> -[ 69.942537] x23: ffffa54a91a3d8d8 x22: ffff0000c5254800 x21: -> -ffff0000c5254800 -> -[ 69.944013] x20: ffff0000ce924180 x19: ffff0000c5254800 x18: -> -ffffffffffffffff -> -[ 69.946100] x17: ffff5ab66e5ef000 x16: ffff80000801c000 x15: -> -0000000000000000 -> -[ 69.947585] x14: 0000000000000001 x13: 0a2e656572662d72 x12: -> -657466612d657375 -> -[ 69.948670] x11: 203b30206e6f206e x10: 6f69746964646120 x9 : -> -ffffa54a8f63d288 -> -[ 69.950679] x8 : 206e6f206e6f6974 x7 : 69646461203a745f x6 : -> -00000000fffff31e -> -[ 69.952113] x5 : ffff0000ff61ba08 x4 : 00000000fffff31e x3 : -> -ffff5ab66e5ef000 -> -root@debian:/sys/bus/cxl/devices/decoder0.0/region0# [ 69.954752] x2 : -> -0000000000000000 x1 : 0000000000000000 x0 : ffff0000c512e740 -> -[ 69.957098] Call trace: -> -[ 69.957959] refcount_warn_saturate+0xa0/0x144 -> -[ 69.958773] get_ndd+0x5c/0x80 -> -[ 69.959294] nd_region_register_namespaces+0xe4/0xe90 -> -[ 69.960253] nd_region_probe+0x100/0x290 -> -[ 69.960796] nvdimm_bus_probe+0xf4/0x1c0 -> -[ 69.962087] really_probe+0x19c/0x3f0 -> -[ 69.962620] __driver_probe_device+0x11c/0x190 -> -[ 69.963258] driver_probe_device+0x44/0xf4 -> -[ 69.963773] __device_attach_driver+0xa4/0x140 -> -[ 69.964471] bus_for_each_drv+0x84/0xe0 -> -[ 69.965068] __device_attach+0xb0/0x1f0 -> -[ 69.966101] device_initial_probe+0x20/0x30 -> -[ 69.967142] bus_probe_device+0xa4/0xb0 -> -[ 69.968104] device_add+0x3e8/0x910 -> -[ 69.969111] nd_async_device_register+0x24/0x74 -> -[ 69.969928] async_run_entry_fn+0x40/0x150 -> -[ 69.970725] process_one_work+0x1dc/0x450 -> -[ 69.971796] worker_thread+0x154/0x450 -> -[ 69.972700] kthread+0x118/0x120 -> -[ 69.974141] ret_from_fork+0x10/0x20 -> -[ 69.975141] ---[ end trace 0000000000000000 ]--- -> -[ 70.117887] Into nd_namespace_pmem_set_resource() - -On Mon, 15 Aug 2022 15:55:15 -0700 -Dan Williams wrote: - -> -Jonathan Cameron wrote: -> -> On Fri, 12 Aug 2022 16:44:03 +0100 -> -> Jonathan Cameron wrote: -> -> -> -> > On Thu, 11 Aug 2022 18:08:57 +0100 -> -> > Jonathan Cameron via wrote: -> -> > -> -> > > On Tue, 9 Aug 2022 17:08:25 +0100 -> -> > > Jonathan Cameron wrote: -> -> > > -> -> > > > On Tue, 9 Aug 2022 21:07:06 +0800 -> -> > > > Bobo WL wrote: -> -> > > > -> -> > > > > Hi Jonathan -> -> > > > > -> -> > > > > Thanks for your reply! -> -> > > > > -> -> > > > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron -> -> > > > > wrote: -> -> > > > > > -> -> > > > > > Probably not related to your problem, but there is a disconnect -> -> > > > > > in QEMU / -> -> > > > > > kernel assumptionsaround the presence of an HDM decoder when a HB -> -> > > > > > only -> -> > > > > > has a single root port. Spec allows it to be provided or not as -> -> > > > > > an implementation choice. -> -> > > > > > Kernel assumes it isn't provide. Qemu assumes it is. -> -> > > > > > -> -> > > > > > The temporary solution is to throw in a second root port on the -> -> > > > > > HB and not -> -> > > > > > connect anything to it. Longer term I may special case this so -> -> > > > > > that the particular -> -> > > > > > decoder defaults to pass through settings in QEMU if there is -> -> > > > > > only one root port. -> -> > > > > > -> -> > > > > -> -> > > > > You are right! After adding an extra HB in qemu, I can create a x1 -> -> > > > > region successfully. -> -> > > > > But have some errors in Nvdimm: -> -> > > > > -> -> > > > > [ 74.925838] Unknown online node for memory at 0x10000000000, -> -> > > > > assuming node 0 -> -> > > > > [ 74.925846] Unknown target node for memory at 0x10000000000, -> -> > > > > assuming node 0 -> -> > > > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe -> -> > > > > -> -> > > > -> -> > > > Ah. I've seen this one, but not chased it down yet. Was on my todo -> -> > > > list to chase -> -> > > > down. Once I reach this state I can verify the HDM Decode is correct -> -> > > > which is what -> -> > > > I've been using to test (Which wasn't true until earlier this week). -> -> > > > I'm currently testing via devmem, more for historical reasons than -> -> > > > because it makes -> -> > > > that much sense anymore. -> -> > > -> -> > > *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. -> -> > > I'd forgotten that was still on the todo list. I don't think it will -> -> > > be particularly hard to do and will take a look in next few days. -> -> > > -> -> > > Very very indirectly this error is causing a driver probe fail that -> -> > > means that -> -> > > we hit a code path that has a rather odd looking check on NDD_LABELING. -> -> > > Should not have gotten near that path though - hence the problem is -> -> > > actually -> -> > > when we call cxl_pmem_get_config_data() and it returns an error because -> -> > > we haven't fully connected up the command in QEMU. -> -> > -> -> > So a least one bug in QEMU. We were not supporting variable length -> -> > payloads on mailbox -> -> > inputs (but were on outputs). That hasn't mattered until we get to LSA -> -> > writes. -> -> > We just need to relax condition on the supplied length. -> -> > -> -> > diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c -> -> > index c352a935c4..fdda9529fe 100644 -> -> > --- a/hw/cxl/cxl-mailbox-utils.c -> -> > +++ b/hw/cxl/cxl-mailbox-utils.c -> -> > @@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) -> -> > cxl_cmd = &cxl_cmd_set[set][cmd]; -> -> > h = cxl_cmd->handler; -> -> > if (h) { -> -> > - if (len == cxl_cmd->in) { -> -> > + if (len == cxl_cmd->in || !cxl_cmd->in) { -> -> Fix is wrong as we use ~0 as the placeholder for variable payload, not 0. -> -> -> -> With that fixed we hit new fun paths - after some errors we get the -> -> worrying - not totally sure but looks like a failure on an error cleanup. -> -> I'll chase down the error source, but even then this is probably -> -> triggerable by -> -> hardware problem or similar. Some bonus prints in here from me chasing -> -> error paths, but it's otherwise just cxl/next + the fix I posted earlier -> -> today. -> -> -One of the scenarios that I cannot rule out is nvdimm_probe() racing -> -nd_region_probe(), but given all the work it takes to create a region I -> -suspect all the nvdimm_probe() work to have completed... -> -> -It is at least one potentially wrong hypothesis that needs to be chased -> -down. -Maybe there should be a special award for the non-intuitive -ndctl create-namespace command (modifies existing namespace and might create -a different empty one...) I'm sure there is some interesting history behind -that one :) - -Upshot is I just threw a filesystem on fsdax and wrote some text files on it -to allow easy grepping. The right data ends up in the memory and a plausible -namespace description is stored in the LSA. - -So to some degree at least it's 'working' on an 8 way direct connected -set of emulated devices. - -One snag is that serial number support isn't yet upstream in QEMU. -(I have had it in my tree for a while but not posted it yet because of - QEMU feature freeze) -https://gitlab.com/jic23/qemu/-/commit/144c783ea8a5fbe169f46ea1ba92940157f42733 -That's needed for meaningful cookie generation. Otherwise you can build the -namespace once, but it won't work on next probe as the cookie is 0 and you -hit some error paths. - -Maybe sensible to add a sanity check and fail namespace creation if -cookie is 0? (Silly side question, but is there a theoretical risk of -a serial number / other data combination leading to a fletcher64() -checksum that happens to be 0 - that would give a very odd bug report!) - -So to make it work the following is needed: - -1) The kernel fix for mailbox buffer overflow. -2) Qemu fix for size of arguements for get_lsa -3) Qemu fix to allow variable size input arguements (for set_lsa) -4) Serial number patch above + command lines to qemu to set appropriate - serial numbers. - -I'll send out the QEMU fixes shortly and post the Serial number patch, -though that almost certainly won't go in until next QEMU development -cycle starts in a few weeks. - -Next up, run through same tests on some other topologies. - -Jonathan - -> -> -> -> -> [ 69.919877] nd_bus ndbus0: START: nd_region.probe(region0) -> -> [ 69.920108] nd_region_probe -> -> [ 69.920623] ------------[ cut here ]------------ -> -> [ 69.920675] refcount_t: addition on 0; use-after-free. -> -> [ 69.921314] WARNING: CPU: 3 PID: 710 at lib/refcount.c:25 -> -> refcount_warn_saturate+0xa0/0x144 -> -> [ 69.926949] Modules linked in: cxl_pmem cxl_mem cxl_pci cxl_port -> -> cxl_acpi cxl_core -> -> [ 69.928830] CPU: 3 PID: 710 Comm: kworker/u8:9 Not tainted 5.19.0-rc3+ -> -> #399 -> -> [ 69.930596] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 -> -> 02/06/2015 -> -> [ 69.931482] Workqueue: events_unbound async_run_entry_fn -> -> [ 69.932403] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS -> -> BTYPE=--) -> -> [ 69.934023] pc : refcount_warn_saturate+0xa0/0x144 -> -> [ 69.935161] lr : refcount_warn_saturate+0xa0/0x144 -> -> [ 69.936541] sp : ffff80000890b960 -> -> [ 69.937921] x29: ffff80000890b960 x28: 0000000000000000 x27: -> -> 0000000000000000 -> -> [ 69.940917] x26: ffffa54a90d5cb10 x25: ffffa54a90809e98 x24: -> -> 0000000000000000 -> -> [ 69.942537] x23: ffffa54a91a3d8d8 x22: ffff0000c5254800 x21: -> -> ffff0000c5254800 -> -> [ 69.944013] x20: ffff0000ce924180 x19: ffff0000c5254800 x18: -> -> ffffffffffffffff -> -> [ 69.946100] x17: ffff5ab66e5ef000 x16: ffff80000801c000 x15: -> -> 0000000000000000 -> -> [ 69.947585] x14: 0000000000000001 x13: 0a2e656572662d72 x12: -> -> 657466612d657375 -> -> [ 69.948670] x11: 203b30206e6f206e x10: 6f69746964646120 x9 : -> -> ffffa54a8f63d288 -> -> [ 69.950679] x8 : 206e6f206e6f6974 x7 : 69646461203a745f x6 : -> -> 00000000fffff31e -> -> [ 69.952113] x5 : ffff0000ff61ba08 x4 : 00000000fffff31e x3 : -> -> ffff5ab66e5ef000 -> -> root@debian:/sys/bus/cxl/devices/decoder0.0/region0# [ 69.954752] x2 : -> -> 0000000000000000 x1 : 0000000000000000 x0 : ffff0000c512e740 -> -> [ 69.957098] Call trace: -> -> [ 69.957959] refcount_warn_saturate+0xa0/0x144 -> -> [ 69.958773] get_ndd+0x5c/0x80 -> -> [ 69.959294] nd_region_register_namespaces+0xe4/0xe90 -> -> [ 69.960253] nd_region_probe+0x100/0x290 -> -> [ 69.960796] nvdimm_bus_probe+0xf4/0x1c0 -> -> [ 69.962087] really_probe+0x19c/0x3f0 -> -> [ 69.962620] __driver_probe_device+0x11c/0x190 -> -> [ 69.963258] driver_probe_device+0x44/0xf4 -> -> [ 69.963773] __device_attach_driver+0xa4/0x140 -> -> [ 69.964471] bus_for_each_drv+0x84/0xe0 -> -> [ 69.965068] __device_attach+0xb0/0x1f0 -> -> [ 69.966101] device_initial_probe+0x20/0x30 -> -> [ 69.967142] bus_probe_device+0xa4/0xb0 -> -> [ 69.968104] device_add+0x3e8/0x910 -> -> [ 69.969111] nd_async_device_register+0x24/0x74 -> -> [ 69.969928] async_run_entry_fn+0x40/0x150 -> -> [ 69.970725] process_one_work+0x1dc/0x450 -> -> [ 69.971796] worker_thread+0x154/0x450 -> -> [ 69.972700] kthread+0x118/0x120 -> -> [ 69.974141] ret_from_fork+0x10/0x20 -> -> [ 69.975141] ---[ end trace 0000000000000000 ]--- -> -> [ 70.117887] Into nd_namespace_pmem_set_resource() - -Bobo WL wrote: -> -Hi list -> -> -I want to test cxl functions in arm64, and found some problems I can't -> -figure out. -> -> -My test environment: -> -> -1. build latest bios from -https://github.com/tianocore/edk2.git -master -> -branch(cc2db6ebfb6d9d85ba4c7b35fba1fa37fffc0bc2) -> -2. build latest qemu-system-aarch64 from git://git.qemu.org/qemu.git -> -master branch(846dcf0ba4eff824c295f06550b8673ff3f31314). With cxl arm -> -support patch: -> -https://patchwork.kernel.org/project/cxl/cover/20220616141950.23374-1-Jonathan.Cameron@huawei.com/ -> -3. build Linux kernel from -> -https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git -preview -> -branch(65fc1c3d26b96002a5aa1f4012fae4dc98fd5683) -> -4. build latest ndctl tools from -https://github.com/pmem/ndctl -> -create_region branch(8558b394e449779e3a4f3ae90fae77ede0bca159) -> -> -And my qemu test commands: -> -sudo $QEMU_BIN -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 \ -> --cpu max -smp 8 -nographic -no-reboot \ -> --kernel $KERNEL -bios $BIOS_BIN \ -> --drive if=none,file=$ROOTFS,format=qcow2,id=hd \ -> --device virtio-blk-pci,drive=hd -append 'root=/dev/vda1 -> -nokaslr dyndbg="module cxl* +p"' \ -> --object memory-backend-ram,size=4G,id=mem0 \ -> --numa node,nodeid=0,cpus=0-7,memdev=mem0 \ -> --net nic -net user,hostfwd=tcp::2222-:22 -enable-kvm \ -> --object -> -memory-backend-file,id=cxl-mem0,share=on,mem-path=/tmp/cxltest.raw,size=256M -> -\ -> --object -> -memory-backend-file,id=cxl-mem1,share=on,mem-path=/tmp/cxltest1.raw,size=256M -> -\ -> --object -> -memory-backend-file,id=cxl-mem2,share=on,mem-path=/tmp/cxltest2.raw,size=256M -> -\ -> --object -> -memory-backend-file,id=cxl-mem3,share=on,mem-path=/tmp/cxltest3.raw,size=256M -> -\ -> --object -> -memory-backend-file,id=cxl-lsa0,share=on,mem-path=/tmp/lsa0.raw,size=256M -> -\ -> --object -> -memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/lsa1.raw,size=256M -> -\ -> --object -> -memory-backend-file,id=cxl-lsa2,share=on,mem-path=/tmp/lsa2.raw,size=256M -> -\ -> --object -> -memory-backend-file,id=cxl-lsa3,share=on,mem-path=/tmp/lsa3.raw,size=256M -> -\ -> --device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \ -> --device cxl-rp,port=0,bus=cxl.1,id=root_port0,chassis=0,slot=0 \ -> --device cxl-upstream,bus=root_port0,id=us0 \ -> --device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \ -> --device -> -cxl-type3,bus=swport0,memdev=cxl-mem0,lsa=cxl-lsa0,id=cxl-pmem0 \ -> --device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \ -> --device -> -cxl-type3,bus=swport1,memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem1 \ -> --device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \ -> --device -> -cxl-type3,bus=swport2,memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem2 \ -> --device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \ -> --device -> -cxl-type3,bus=swport3,memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3 \ -> --M -> -cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k -> -> -And I have got two problems. -> -1. When I want to create x1 region with command: "cxl create-region -d -> -decoder0.0 -w 1 -g 4096 mem0", kernel crashed with null pointer -> -reference. Crash log: -> -> -[ 534.697324] cxl_region region0: config state: 0 -> -[ 534.697346] cxl_region region0: probe: -6 -> -[ 534.697368] cxl_acpi ACPI0017:00: decoder0.0: created region0 -> -[ 534.699115] cxl region0: mem0:endpoint3 decoder3.0 add: -> -mem0:decoder3.0 @ 0 next: none nr_eps: 1 nr_targets: 1 -> -[ 534.699149] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: -> -mem0:decoder3.0 @ 0 next: mem0 nr_eps: 1 nr_targets: 1 -> -[ 534.699167] cxl region0: ACPI0016:00:port1 decoder1.0 add: -> -mem0:decoder3.0 @ 0 next: 0000:0d:00.0 nr_eps: 1 nr_targets: 1 -> -[ 534.699176] cxl region0: ACPI0016:00:port1 iw: 1 ig: 256 -> -[ 534.699182] cxl region0: ACPI0016:00:port1 target[0] = 0000:0c:00.0 -> -for mem0:decoder3.0 @ 0 -> -[ 534.699189] cxl region0: 0000:0d:00.0:port2 iw: 1 ig: 256 -> -[ 534.699193] cxl region0: 0000:0d:00.0:port2 target[0] = -> -0000:0e:00.0 for mem0:decoder3.0 @ 0 -> -[ 534.699405] Unable to handle kernel NULL pointer dereference at -> -virtual address 0000000000000000 -> -[ 534.701474] Mem abort info: -> -[ 534.701994] ESR = 0x0000000086000004 -> -[ 534.702653] EC = 0x21: IABT (current EL), IL = 32 bits -> -[ 534.703616] SET = 0, FnV = 0 -> -[ 534.704174] EA = 0, S1PTW = 0 -> -[ 534.704803] FSC = 0x04: level 0 translation fault -> -[ 534.705694] user pgtable: 4k pages, 48-bit VAs, pgdp=000000010144a000 -> -[ 534.706875] [0000000000000000] pgd=0000000000000000, p4d=0000000000000000 -> -[ 534.709855] Internal error: Oops: 86000004 [#1] PREEMPT SMP -> -[ 534.710301] Modules linked in: -> -[ 534.710546] CPU: 7 PID: 331 Comm: cxl Not tainted -> -5.19.0-rc3-00064-g65fc1c3d26b9-dirty #11 -> -[ 534.715393] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 -> -[ 534.717179] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) -> -[ 534.719190] pc : 0x0 -> -[ 534.719928] lr : commit_store+0x118/0x2cc -> -[ 534.721007] sp : ffff80000aec3c30 -> -[ 534.721793] x29: ffff80000aec3c30 x28: ffff0000da62e740 x27: -> -ffff0000c0c06b30 -> -[ 534.723875] x26: 0000000000000000 x25: ffff0000c0a2a400 x24: -> -ffff0000c0a29400 -> -[ 534.725440] x23: 0000000000000003 x22: 0000000000000000 x21: -> -ffff0000c0c06800 -> -[ 534.727312] x20: 0000000000000000 x19: ffff0000c1559800 x18: -> -0000000000000000 -> -[ 534.729138] x17: 0000000000000000 x16: 0000000000000000 x15: -> -0000ffffd41fe838 -> -[ 534.731046] x14: 0000000000000000 x13: 0000000000000000 x12: -> -0000000000000000 -> -[ 534.732402] x11: 0000000000000000 x10: 0000000000000000 x9 : -> -0000000000000000 -> -[ 534.734432] x8 : 0000000000000000 x7 : 0000000000000000 x6 : -> -ffff0000c0906e80 -> -[ 534.735921] x5 : 0000000000000000 x4 : 0000000000000000 x3 : -> -ffff80000aec3bf0 -> -[ 534.737437] x2 : 0000000000000000 x1 : 0000000000000000 x0 : -> -ffff0000c155a000 -> -[ 534.738878] Call trace: -> -[ 534.739368] 0x0 -> -[ 534.739713] dev_attr_store+0x1c/0x30 -> -[ 534.740186] sysfs_kf_write+0x48/0x58 -> -[ 534.740961] kernfs_fop_write_iter+0x128/0x184 -> -[ 534.741872] new_sync_write+0xdc/0x158 -> -[ 534.742706] vfs_write+0x1ac/0x2a8 -> -[ 534.743440] ksys_write+0x68/0xf0 -> -[ 534.744328] __arm64_sys_write+0x1c/0x28 -> -[ 534.745180] invoke_syscall+0x44/0xf0 -> -[ 534.745989] el0_svc_common+0x4c/0xfc -> -[ 534.746661] do_el0_svc+0x60/0xa8 -> -[ 534.747378] el0_svc+0x2c/0x78 -> -[ 534.748066] el0t_64_sync_handler+0xb8/0x12c -> -[ 534.748919] el0t_64_sync+0x18c/0x190 -> -[ 534.749629] Code: bad PC value -> -[ 534.750169] ---[ end trace 0000000000000000 ]--- -What was the top kernel commit when you ran this test? What is the line -number of "commit_store+0x118"? - -> -2. When I want to create x4 region with command: "cxl create-region -d -> -decoder0.0 -w 4 -g 4096 -m mem0 mem1 mem2 mem3". I got below errors: -> -> -cxl region: create_region: region0: failed to set target3 to mem3 -> -cxl region: cmd_create_region: created 0 regions -> -> -And kernel log as below: -> -[ 60.536663] cxl_region region0: config state: 0 -> -[ 60.536675] cxl_region region0: probe: -6 -> -[ 60.536696] cxl_acpi ACPI0017:00: decoder0.0: created region0 -> -[ 60.538251] cxl region0: mem0:endpoint3 decoder3.0 add: -> -mem0:decoder3.0 @ 0 next: none nr_eps: 1 nr_targets: 1 -> -[ 60.538278] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: -> -mem0:decoder3.0 @ 0 next: mem0 nr_eps: 1 nr_targets: 1 -> -[ 60.538295] cxl region0: ACPI0016:00:port1 decoder1.0 add: -> -mem0:decoder3.0 @ 0 next: 0000:0d:00.0 nr_eps: 1 nr_targets: 1 -> -[ 60.538647] cxl region0: mem1:endpoint4 decoder4.0 add: -> -mem1:decoder4.0 @ 1 next: none nr_eps: 1 nr_targets: 1 -> -[ 60.538663] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: -> -mem1:decoder4.0 @ 1 next: mem1 nr_eps: 2 nr_targets: 2 -> -[ 60.538675] cxl region0: ACPI0016:00:port1 decoder1.0 add: -> -mem1:decoder4.0 @ 1 next: 0000:0d:00.0 nr_eps: 2 nr_targets: 1 -> -[ 60.539311] cxl region0: mem2:endpoint5 decoder5.0 add: -> -mem2:decoder5.0 @ 2 next: none nr_eps: 1 nr_targets: 1 -> -[ 60.539332] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: -> -mem2:decoder5.0 @ 2 next: mem2 nr_eps: 3 nr_targets: 3 -> -[ 60.539343] cxl region0: ACPI0016:00:port1 decoder1.0 add: -> -mem2:decoder5.0 @ 2 next: 0000:0d:00.0 nr_eps: 3 nr_targets: 1 -> -[ 60.539711] cxl region0: mem3:endpoint6 decoder6.0 add: -> -mem3:decoder6.0 @ 3 next: none nr_eps: 1 nr_targets: 1 -> -[ 60.539723] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: -> -mem3:decoder6.0 @ 3 next: mem3 nr_eps: 4 nr_targets: 4 -> -[ 60.539735] cxl region0: ACPI0016:00:port1 decoder1.0 add: -> -mem3:decoder6.0 @ 3 next: 0000:0d:00.0 nr_eps: 4 nr_targets: 1 -> -[ 60.539742] cxl region0: ACPI0016:00:port1 iw: 1 ig: 256 -> -[ 60.539747] cxl region0: ACPI0016:00:port1 target[0] = 0000:0c:00.0 -> -for mem0:decoder3.0 @ 0 -> -[ 60.539754] cxl region0: 0000:0d:00.0:port2 iw: 4 ig: 512 -> -[ 60.539758] cxl region0: 0000:0d:00.0:port2 target[0] = -> -0000:0e:00.0 for mem0:decoder3.0 @ 0 -> -[ 60.539764] cxl region0: ACPI0016:00:port1: cannot host mem1:decoder4.0 at -> -1 -> -> -I have tried to write sysfs node manually, got same errors. -> -> -Hope I can get some helps here. -What is the output of: - - cxl list -MDTu -d decoder0.0 - -...? It might be the case that mem1 cannot be mapped by decoder0.0, or -at least not in the specified order, or that validation check is broken. - -Hi Dan, - -Thanks for your reply! - -On Mon, Aug 8, 2022 at 11:58 PM Dan Williams wrote: -> -> -What is the output of: -> -> -cxl list -MDTu -d decoder0.0 -> -> -...? It might be the case that mem1 cannot be mapped by decoder0.0, or -> -at least not in the specified order, or that validation check is broken. -Command "cxl list -MDTu -d decoder0.0" output: - -[ - { - "memdevs":[ - { - "memdev":"mem2", - "pmem_size":"256.00 MiB (268.44 MB)", - "ram_size":0, - "serial":"0", - "host":"0000:11:00.0" - }, - { - "memdev":"mem1", - "pmem_size":"256.00 MiB (268.44 MB)", - "ram_size":0, - "serial":"0", - "host":"0000:10:00.0" - }, - { - "memdev":"mem0", - "pmem_size":"256.00 MiB (268.44 MB)", - "ram_size":0, - "serial":"0", - "host":"0000:0f:00.0" - }, - { - "memdev":"mem3", - "pmem_size":"256.00 MiB (268.44 MB)", - "ram_size":0, - "serial":"0", - "host":"0000:12:00.0" - } - ] - }, - { - "root decoders":[ - { - "decoder":"decoder0.0", - "resource":"0x10000000000", - "size":"4.00 GiB (4.29 GB)", - "pmem_capable":true, - "volatile_capable":true, - "accelmem_capable":true, - "nr_targets":1, - "targets":[ - { - "target":"ACPI0016:01", - "alias":"pci0000:0c", - "position":0, - "id":"0xc" - } - ] - } - ] - } -] - -Bobo WL wrote: -> -Hi Dan, -> -> -Thanks for your reply! -> -> -On Mon, Aug 8, 2022 at 11:58 PM Dan Williams wrote: -> -> -> -> What is the output of: -> -> -> -> cxl list -MDTu -d decoder0.0 -> -> -> -> ...? It might be the case that mem1 cannot be mapped by decoder0.0, or -> -> at least not in the specified order, or that validation check is broken. -> -> -Command "cxl list -MDTu -d decoder0.0" output: -Thanks for this, I think I know the problem, but will try some -experiments with cxl_test first. - -Did the commit_store() crash stop reproducing with latest cxl/preview -branch? - -On Tue, Aug 9, 2022 at 11:17 PM Dan Williams wrote: -> -> -Bobo WL wrote: -> -> Hi Dan, -> -> -> -> Thanks for your reply! -> -> -> -> On Mon, Aug 8, 2022 at 11:58 PM Dan Williams -> -> wrote: -> -> > -> -> > What is the output of: -> -> > -> -> > cxl list -MDTu -d decoder0.0 -> -> > -> -> > ...? It might be the case that mem1 cannot be mapped by decoder0.0, or -> -> > at least not in the specified order, or that validation check is broken. -> -> -> -> Command "cxl list -MDTu -d decoder0.0" output: -> -> -Thanks for this, I think I know the problem, but will try some -> -experiments with cxl_test first. -> -> -Did the commit_store() crash stop reproducing with latest cxl/preview -> -branch? -No, still hitting this bug if don't add extra HB device in qemu - -Dan Williams wrote: -> -Bobo WL wrote: -> -> Hi Dan, -> -> -> -> Thanks for your reply! -> -> -> -> On Mon, Aug 8, 2022 at 11:58 PM Dan Williams -> -> wrote: -> -> > -> -> > What is the output of: -> -> > -> -> > cxl list -MDTu -d decoder0.0 -> -> > -> -> > ...? It might be the case that mem1 cannot be mapped by decoder0.0, or -> -> > at least not in the specified order, or that validation check is broken. -> -> -> -> Command "cxl list -MDTu -d decoder0.0" output: -> -> -Thanks for this, I think I know the problem, but will try some -> -experiments with cxl_test first. -Hmm, so my cxl_test experiment unfortunately passed so I'm not -reproducing the failure mode. This is the result of creating x4 region -with devices directly attached to a single host-bridge: - -# cxl create-region -d decoder3.5 -w 4 -m -g 256 mem{12,10,9,11} -s $((1<<30)) -{ - "region":"region8", - "resource":"0xf1f0000000", - "size":"1024.00 MiB (1073.74 MB)", - "interleave_ways":4, - "interleave_granularity":256, - "decode_state":"commit", - "mappings":[ - { - "position":3, - "memdev":"mem11", - "decoder":"decoder21.0" - }, - { - "position":2, - "memdev":"mem9", - "decoder":"decoder19.0" - }, - { - "position":1, - "memdev":"mem10", - "decoder":"decoder20.0" - }, - { - "position":0, - "memdev":"mem12", - "decoder":"decoder22.0" - } - ] -} -cxl region: cmd_create_region: created 1 region - -> -Did the commit_store() crash stop reproducing with latest cxl/preview -> -branch? -I missed the answer to this question. - -All of these changes are now in Linus' tree perhaps give that a try and -post the debug log again? - -On Thu, 11 Aug 2022 17:46:55 -0700 -Dan Williams wrote: - -> -Dan Williams wrote: -> -> Bobo WL wrote: -> -> > Hi Dan, -> -> > -> -> > Thanks for your reply! -> -> > -> -> > On Mon, Aug 8, 2022 at 11:58 PM Dan Williams -> -> > wrote: -> -> > > -> -> > > What is the output of: -> -> > > -> -> > > cxl list -MDTu -d decoder0.0 -> -> > > -> -> > > ...? It might be the case that mem1 cannot be mapped by decoder0.0, or -> -> > > at least not in the specified order, or that validation check is -> -> > > broken. -> -> > -> -> > Command "cxl list -MDTu -d decoder0.0" output: -> -> -> -> Thanks for this, I think I know the problem, but will try some -> -> experiments with cxl_test first. -> -> -Hmm, so my cxl_test experiment unfortunately passed so I'm not -> -reproducing the failure mode. This is the result of creating x4 region -> -with devices directly attached to a single host-bridge: -> -> -# cxl create-region -d decoder3.5 -w 4 -m -g 256 mem{12,10,9,11} -s $((1<<30)) -> -{ -> -"region":"region8", -> -"resource":"0xf1f0000000", -> -"size":"1024.00 MiB (1073.74 MB)", -> -"interleave_ways":4, -> -"interleave_granularity":256, -> -"decode_state":"commit", -> -"mappings":[ -> -{ -> -"position":3, -> -"memdev":"mem11", -> -"decoder":"decoder21.0" -> -}, -> -{ -> -"position":2, -> -"memdev":"mem9", -> -"decoder":"decoder19.0" -> -}, -> -{ -> -"position":1, -> -"memdev":"mem10", -> -"decoder":"decoder20.0" -> -}, -> -{ -> -"position":0, -> -"memdev":"mem12", -> -"decoder":"decoder22.0" -> -} -> -] -> -} -> -cxl region: cmd_create_region: created 1 region -> -> -> Did the commit_store() crash stop reproducing with latest cxl/preview -> -> branch? -> -> -I missed the answer to this question. -> -> -All of these changes are now in Linus' tree perhaps give that a try and -> -post the debug log again? -Hi Dan, - -I've moved onto looking at this one. -1 HB, 2RP (to make it configure the HDM decoder in the QEMU HB, I'll tidy that -up -at some stage), 1 switch, 4 downstream switch ports each with a type 3 - -I'm not getting a crash, but can't successfully setup a region. -Upon adding the final target -It's failing in check_last_peer() as pos < distance. -Seems distance is 4 which makes me think it's using the wrong level of the -heirarchy for -some reason or that distance check is wrong. -Wasn't a good idea to just skip that step though as it goes boom - though -stack trace is not useful. - -Jonathan - -On Wed, 17 Aug 2022 17:16:19 +0100 -Jonathan Cameron wrote: - -> -On Thu, 11 Aug 2022 17:46:55 -0700 -> -Dan Williams wrote: -> -> -> Dan Williams wrote: -> -> > Bobo WL wrote: -> -> > > Hi Dan, -> -> > > -> -> > > Thanks for your reply! -> -> > > -> -> > > On Mon, Aug 8, 2022 at 11:58 PM Dan Williams -> -> > > wrote: -> -> > > > -> -> > > > What is the output of: -> -> > > > -> -> > > > cxl list -MDTu -d decoder0.0 -> -> > > > -> -> > > > ...? It might be the case that mem1 cannot be mapped by decoder0.0, or -> -> > > > at least not in the specified order, or that validation check is -> -> > > > broken. -> -> > > -> -> > > Command "cxl list -MDTu -d decoder0.0" output: -> -> > -> -> > Thanks for this, I think I know the problem, but will try some -> -> > experiments with cxl_test first. -> -> -> -> Hmm, so my cxl_test experiment unfortunately passed so I'm not -> -> reproducing the failure mode. This is the result of creating x4 region -> -> with devices directly attached to a single host-bridge: -> -> -> -> # cxl create-region -d decoder3.5 -w 4 -m -g 256 mem{12,10,9,11} -s -> -> $((1<<30)) -> -> { -> -> "region":"region8", -> -> "resource":"0xf1f0000000", -> -> "size":"1024.00 MiB (1073.74 MB)", -> -> "interleave_ways":4, -> -> "interleave_granularity":256, -> -> "decode_state":"commit", -> -> "mappings":[ -> -> { -> -> "position":3, -> -> "memdev":"mem11", -> -> "decoder":"decoder21.0" -> -> }, -> -> { -> -> "position":2, -> -> "memdev":"mem9", -> -> "decoder":"decoder19.0" -> -> }, -> -> { -> -> "position":1, -> -> "memdev":"mem10", -> -> "decoder":"decoder20.0" -> -> }, -> -> { -> -> "position":0, -> -> "memdev":"mem12", -> -> "decoder":"decoder22.0" -> -> } -> -> ] -> -> } -> -> cxl region: cmd_create_region: created 1 region -> -> -> -> > Did the commit_store() crash stop reproducing with latest cxl/preview -> -> > branch? -> -> -> -> I missed the answer to this question. -> -> -> -> All of these changes are now in Linus' tree perhaps give that a try and -> -> post the debug log again? -> -> -Hi Dan, -> -> -I've moved onto looking at this one. -> -1 HB, 2RP (to make it configure the HDM decoder in the QEMU HB, I'll tidy -> -that up -> -at some stage), 1 switch, 4 downstream switch ports each with a type 3 -> -> -I'm not getting a crash, but can't successfully setup a region. -> -Upon adding the final target -> -It's failing in check_last_peer() as pos < distance. -> -Seems distance is 4 which makes me think it's using the wrong level of the -> -heirarchy for -> -some reason or that distance check is wrong. -> -Wasn't a good idea to just skip that step though as it goes boom - though -> -stack trace is not useful. -Turns out really weird corruption happens if you accidentally back two type3 -devices -with the same memory device. Who would have thought it :) - -That aside ignoring the check_last_peer() failure seems to make everything work -for this -topology. I'm not seeing the crash, so my guess is we fixed it somewhere along -the way. - -Now for the fun one. I've replicated the crash if we have - -1HB 1*RP 1SW, 4SW-DSP, 4Type3 - -Now, I'd expect to see it not 'work' because the QEMU HDM decoder won't be -programmed -but the null pointer dereference isn't related to that. - -The bug is straight forward. Not all decoders have commit callbacks... Will -send out -a possible fix shortly. - -Jonathan - - - -> -> -Jonathan -> -> -> -> -> -> - -On Thu, 18 Aug 2022 17:37:40 +0100 -Jonathan Cameron via wrote: - -> -On Wed, 17 Aug 2022 17:16:19 +0100 -> -Jonathan Cameron wrote: -> -> -> On Thu, 11 Aug 2022 17:46:55 -0700 -> -> Dan Williams wrote: -> -> -> -> > Dan Williams wrote: -> -> > > Bobo WL wrote: -> -> > > > Hi Dan, -> -> > > > -> -> > > > Thanks for your reply! -> -> > > > -> -> > > > On Mon, Aug 8, 2022 at 11:58 PM Dan Williams -> -> > > > wrote: -> -> > > > > -> -> > > > > What is the output of: -> -> > > > > -> -> > > > > cxl list -MDTu -d decoder0.0 -> -> > > > > -> -> > > > > ...? It might be the case that mem1 cannot be mapped by decoder0.0, -> -> > > > > or -> -> > > > > at least not in the specified order, or that validation check is -> -> > > > > broken. -> -> > > > -> -> > > > Command "cxl list -MDTu -d decoder0.0" output: -> -> > > -> -> > > Thanks for this, I think I know the problem, but will try some -> -> > > experiments with cxl_test first. -> -> > -> -> > Hmm, so my cxl_test experiment unfortunately passed so I'm not -> -> > reproducing the failure mode. This is the result of creating x4 region -> -> > with devices directly attached to a single host-bridge: -> -> > -> -> > # cxl create-region -d decoder3.5 -w 4 -m -g 256 mem{12,10,9,11} -s -> -> > $((1<<30)) -> -> > { -> -> > "region":"region8", -> -> > "resource":"0xf1f0000000", -> -> > "size":"1024.00 MiB (1073.74 MB)", -> -> > "interleave_ways":4, -> -> > "interleave_granularity":256, -> -> > "decode_state":"commit", -> -> > "mappings":[ -> -> > { -> -> > "position":3, -> -> > "memdev":"mem11", -> -> > "decoder":"decoder21.0" -> -> > }, -> -> > { -> -> > "position":2, -> -> > "memdev":"mem9", -> -> > "decoder":"decoder19.0" -> -> > }, -> -> > { -> -> > "position":1, -> -> > "memdev":"mem10", -> -> > "decoder":"decoder20.0" -> -> > }, -> -> > { -> -> > "position":0, -> -> > "memdev":"mem12", -> -> > "decoder":"decoder22.0" -> -> > } -> -> > ] -> -> > } -> -> > cxl region: cmd_create_region: created 1 region -> -> > -> -> > > Did the commit_store() crash stop reproducing with latest cxl/preview -> -> > > branch? -> -> > -> -> > I missed the answer to this question. -> -> > -> -> > All of these changes are now in Linus' tree perhaps give that a try and -> -> > post the debug log again? -> -> -> -> Hi Dan, -> -> -> -> I've moved onto looking at this one. -> -> 1 HB, 2RP (to make it configure the HDM decoder in the QEMU HB, I'll tidy -> -> that up -> -> at some stage), 1 switch, 4 downstream switch ports each with a type 3 -> -> -> -> I'm not getting a crash, but can't successfully setup a region. -> -> Upon adding the final target -> -> It's failing in check_last_peer() as pos < distance. -> -> Seems distance is 4 which makes me think it's using the wrong level of the -> -> heirarchy for -> -> some reason or that distance check is wrong. -> -> Wasn't a good idea to just skip that step though as it goes boom - though -> -> stack trace is not useful. -> -> -Turns out really weird corruption happens if you accidentally back two type3 -> -devices -> -with the same memory device. Who would have thought it :) -> -> -That aside ignoring the check_last_peer() failure seems to make everything -> -work for this -> -topology. I'm not seeing the crash, so my guess is we fixed it somewhere -> -along the way. -> -> -Now for the fun one. I've replicated the crash if we have -> -> -1HB 1*RP 1SW, 4SW-DSP, 4Type3 -> -> -Now, I'd expect to see it not 'work' because the QEMU HDM decoder won't be -> -programmed -> -but the null pointer dereference isn't related to that. -> -> -The bug is straight forward. Not all decoders have commit callbacks... Will -> -send out -> -a possible fix shortly. -> -For completeness I'm carrying this hack because I haven't gotten my head -around the right fix for check_last_peer() failing on this test topology. - -diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c -index c49d9a5f1091..275e143bd748 100644 ---- a/drivers/cxl/core/region.c -+++ b/drivers/cxl/core/region.c -@@ -978,7 +978,7 @@ static int cxl_port_setup_targets(struct cxl_port *port, - rc = check_last_peer(cxled, ep, cxl_rr, - distance); - if (rc) -- return rc; -+ // return rc; - goto out_target_set; - } - goto add_target; --- - -I might find more bugs with more testing, but this is all the ones I've -seen so far + in Bobo's reports. Qemu fixes are now in upstream so -will be there in the release. - -As a reminder, testing on QEMU has a few corners... - -Need a patch to add serial number ECAP support. It is on list for revew, -but will have wait for after QEMU 7.1 release (which may be next week) - -QEMU still assumes HDM decoder on the host bridge will be programmed. -So if you want anything to work there should be at least -2 RP below the HB (no need to plug anything in to one of them). - -I don't want to add a commandline parameter to hide the decoder in QEMU -and detecting there is only one RP would require moving a bunch of static -stuff into runtime code (I think). - -I still think we should make the kernel check to see if there is a decoder, -but if not I might see how bad a hack it is to have QEMU ignore that decoder -if not committed in this one special case (HB HDM decoder with only one place -it can send stuff). Obviously that would be a break from specification -so less than idea! - -Thanks, - -Jonathan - -On Fri, 19 Aug 2022 09:46:55 +0100 -Jonathan Cameron wrote: - -> -On Thu, 18 Aug 2022 17:37:40 +0100 -> -Jonathan Cameron via wrote: -> -> -> On Wed, 17 Aug 2022 17:16:19 +0100 -> -> Jonathan Cameron wrote: -> -> -> -> > On Thu, 11 Aug 2022 17:46:55 -0700 -> -> > Dan Williams wrote: -> -> > -> -> > > Dan Williams wrote: -> -> > > > Bobo WL wrote: -> -> > > > > Hi Dan, -> -> > > > > -> -> > > > > Thanks for your reply! -> -> > > > > -> -> > > > > On Mon, Aug 8, 2022 at 11:58 PM Dan Williams -> -> > > > > wrote: -> -> > > > > > -> -> > > > > > What is the output of: -> -> > > > > > -> -> > > > > > cxl list -MDTu -d decoder0.0 -> -> > > > > > -> -> > > > > > ...? It might be the case that mem1 cannot be mapped by -> -> > > > > > decoder0.0, or -> -> > > > > > at least not in the specified order, or that validation check is -> -> > > > > > broken. -> -> > > > > -> -> > > > > Command "cxl list -MDTu -d decoder0.0" output: -> -> > > > -> -> > > > Thanks for this, I think I know the problem, but will try some -> -> > > > experiments with cxl_test first. -> -> > > -> -> > > Hmm, so my cxl_test experiment unfortunately passed so I'm not -> -> > > reproducing the failure mode. This is the result of creating x4 region -> -> > > with devices directly attached to a single host-bridge: -> -> > > -> -> > > # cxl create-region -d decoder3.5 -w 4 -m -g 256 mem{12,10,9,11} -s -> -> > > $((1<<30)) -> -> > > { -> -> > > "region":"region8", -> -> > > "resource":"0xf1f0000000", -> -> > > "size":"1024.00 MiB (1073.74 MB)", -> -> > > "interleave_ways":4, -> -> > > "interleave_granularity":256, -> -> > > "decode_state":"commit", -> -> > > "mappings":[ -> -> > > { -> -> > > "position":3, -> -> > > "memdev":"mem11", -> -> > > "decoder":"decoder21.0" -> -> > > }, -> -> > > { -> -> > > "position":2, -> -> > > "memdev":"mem9", -> -> > > "decoder":"decoder19.0" -> -> > > }, -> -> > > { -> -> > > "position":1, -> -> > > "memdev":"mem10", -> -> > > "decoder":"decoder20.0" -> -> > > }, -> -> > > { -> -> > > "position":0, -> -> > > "memdev":"mem12", -> -> > > "decoder":"decoder22.0" -> -> > > } -> -> > > ] -> -> > > } -> -> > > cxl region: cmd_create_region: created 1 region -> -> > > -> -> > > > Did the commit_store() crash stop reproducing with latest cxl/preview -> -> > > > branch? -> -> > > -> -> > > I missed the answer to this question. -> -> > > -> -> > > All of these changes are now in Linus' tree perhaps give that a try and -> -> > > post the debug log again? -> -> > -> -> > Hi Dan, -> -> > -> -> > I've moved onto looking at this one. -> -> > 1 HB, 2RP (to make it configure the HDM decoder in the QEMU HB, I'll tidy -> -> > that up -> -> > at some stage), 1 switch, 4 downstream switch ports each with a type 3 -> -> > -> -> > I'm not getting a crash, but can't successfully setup a region. -> -> > Upon adding the final target -> -> > It's failing in check_last_peer() as pos < distance. -> -> > Seems distance is 4 which makes me think it's using the wrong level of -> -> > the heirarchy for -> -> > some reason or that distance check is wrong. -> -> > Wasn't a good idea to just skip that step though as it goes boom - though -> -> > stack trace is not useful. -> -> -> -> Turns out really weird corruption happens if you accidentally back two -> -> type3 devices -> -> with the same memory device. Who would have thought it :) -> -> -> -> That aside ignoring the check_last_peer() failure seems to make everything -> -> work for this -> -> topology. I'm not seeing the crash, so my guess is we fixed it somewhere -> -> along the way. -> -> -> -> Now for the fun one. I've replicated the crash if we have -> -> -> -> 1HB 1*RP 1SW, 4SW-DSP, 4Type3 -> -> -> -> Now, I'd expect to see it not 'work' because the QEMU HDM decoder won't be -> -> programmed -> -> but the null pointer dereference isn't related to that. -> -> -> -> The bug is straight forward. Not all decoders have commit callbacks... -> -> Will send out -> -> a possible fix shortly. -> -> -> -For completeness I'm carrying this hack because I haven't gotten my head -> -around the right fix for check_last_peer() failing on this test topology. -> -> -diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c -> -index c49d9a5f1091..275e143bd748 100644 -> ---- a/drivers/cxl/core/region.c -> -+++ b/drivers/cxl/core/region.c -> -@@ -978,7 +978,7 @@ static int cxl_port_setup_targets(struct cxl_port *port, -> -rc = check_last_peer(cxled, ep, cxl_rr, -> -distance); -> -if (rc) -> -- return rc; -> -+ // return rc; -> -goto out_target_set; -> -} -> -goto add_target; -I'm still carrying this hack and still haven't worked out the right fix. - -Suggestions welcome! If not I'll hopefully get some time on this -towards the end of the week. - -Jonathan - diff --git a/classification_output/01/instruction/2880487 b/classification_output/01/instruction/2880487 deleted file mode 100644 index 1d455d6fa..000000000 --- a/classification_output/01/instruction/2880487 +++ /dev/null @@ -1,187 +0,0 @@ -instruction: 0.925 -semantic: 0.924 -other: 0.894 -mistranslation: 0.826 - -[BUG] AArch64 boot hang with -icount and -smp >1 (iothread locking issue?) - -Hello, - -I am encountering one or more bugs when using -icount and -smp >1 that I am -attempting to sort out. My current theory is that it is an iothread locking -issue. - -I am using a command-line like the following where $kernel is a recent upstream -AArch64 Linux kernel Image (I can provide a binary if that would be helpful - -let me know how is best to post): - - qemu-system-aarch64 \ - -M virt -cpu cortex-a57 -m 1G \ - -nographic \ - -smp 2 \ - -icount 0 \ - -kernel $kernel - -For any/all of the symptoms described below, they seem to disappear when I -either remove `-icount 0` or change smp to `-smp 1`. In other words, it is the -combination of `-smp >1` and `-icount` which triggers what I'm seeing. - -I am seeing two different (but seemingly related) behaviors. The first (and -what I originally started debugging) shows up as a boot hang. When booting -using the above command after Peter's "icount: Take iothread lock when running -QEMU timers" patch [1], The kernel boots for a while and then hangs after: - -> -...snip... -> -[ 0.010764] Serial: AMBA PL011 UART driver -> -[ 0.016334] 9000000.pl011: ttyAMA0 at MMIO 0x9000000 (irq = 13, base_baud -> -= 0) is a PL011 rev1 -> -[ 0.016907] printk: console [ttyAMA0] enabled -> -[ 0.017624] KASLR enabled -> -[ 0.031986] HugeTLB: registered 16.0 GiB page size, pre-allocated 0 pages -> -[ 0.031986] HugeTLB: 16320 KiB vmemmap can be freed for a 16.0 GiB page -> -[ 0.031986] HugeTLB: registered 512 MiB page size, pre-allocated 0 pages -> -[ 0.031986] HugeTLB: 448 KiB vmemmap can be freed for a 512 MiB page -> -[ 0.031986] HugeTLB: registered 2.00 MiB page size, pre-allocated 0 pages -> -[ 0.031986] HugeTLB: 0 KiB vmemmap can be freed for a 2.00 MiB page -When it hangs here, I drop into QEMU's console, attach to the gdbserver, and it -always reports that it is at address 0xffff800008dc42e8 (as shown below from an -objdump of the vmlinux). I note this is in the middle of messing with timer -system registers - which makes me suspect we're attempting to take the iothread -lock when its already held: - -> -ffff800008dc42b8 : -> -ffff800008dc42b8: d503201f nop -> -ffff800008dc42bc: d503201f nop -> -ffff800008dc42c0: d503233f paciasp -> -ffff800008dc42c4: d53be321 mrs x1, cntv_ctl_el0 -> -ffff800008dc42c8: 32000021 orr w1, w1, #0x1 -> -ffff800008dc42cc: d5033fdf isb -> -ffff800008dc42d0: d53be042 mrs x2, cntvct_el0 -> -ffff800008dc42d4: ca020043 eor x3, x2, x2 -> -ffff800008dc42d8: 8b2363e3 add x3, sp, x3 -> -ffff800008dc42dc: f940007f ldr xzr, [x3] -> -ffff800008dc42e0: 8b020000 add x0, x0, x2 -> -ffff800008dc42e4: d51be340 msr cntv_cval_el0, x0 -> -* ffff800008dc42e8: 927ef820 and x0, x1, #0xfffffffffffffffd -> -ffff800008dc42ec: d51be320 msr cntv_ctl_el0, x0 -> -ffff800008dc42f0: d5033fdf isb -> -ffff800008dc42f4: 52800000 mov w0, #0x0 -> -// #0 -> -ffff800008dc42f8: d50323bf autiasp -> -ffff800008dc42fc: d65f03c0 ret -The second behavior is that prior to Peter's "icount: Take iothread lock when -running QEMU timers" patch [1], I observe the following message (same command -as above): - -> -ERROR:../accel/tcg/tcg-accel-ops.c:79:tcg_handle_interrupt: assertion failed: -> -(qemu_mutex_iothread_locked()) -> -Aborted (core dumped) -This is the same behavior described in Gitlab issue 1130 [0] and addressed by -[1]. I bisected the appearance of this assertion, and found it was introduced -by Pavel's "replay: rewrite async event handling" commit [2]. Commits prior to -that one boot successfully (neither assertions nor hangs) with `-icount 0 -smp -2`. - -I've looked over these two commits ([1], [2]), but it is not obvious to me -how/why they might be interacting to produce the boot hangs I'm seeing and -I welcome any help investigating further. - -Thanks! - --Aaron Lindsay - -[0] - -https://gitlab.com/qemu-project/qemu/-/issues/1130 -[1] - -https://gitlab.com/qemu-project/qemu/-/commit/c7f26ded6d5065e4116f630f6a490b55f6c5f58e -[2] - -https://gitlab.com/qemu-project/qemu/-/commit/60618e2d77691e44bb78e23b2b0cf07b5c405e56 - -On Fri, 21 Oct 2022 at 16:48, Aaron Lindsay - wrote: -> -> -Hello, -> -> -I am encountering one or more bugs when using -icount and -smp >1 that I am -> -attempting to sort out. My current theory is that it is an iothread locking -> -issue. -Weird coincidence, that is a bug that's been in the tree for months -but was only reported to me earlier this week. Try reverting -commit a82fd5a4ec24d923ff1e -- that should fix it. -CAFEAcA_i8x00hD-4XX18ySLNbCB6ds1-DSazVb4yDnF8skjd9A@mail.gmail.com -/">https://lore.kernel.org/qemu-devel/ -CAFEAcA_i8x00hD-4XX18ySLNbCB6ds1-DSazVb4yDnF8skjd9A@mail.gmail.com -/ -has the explanation. - -thanks --- PMM - -On Oct 21 17:00, Peter Maydell wrote: -> -On Fri, 21 Oct 2022 at 16:48, Aaron Lindsay -> - wrote: -> -> -> -> Hello, -> -> -> -> I am encountering one or more bugs when using -icount and -smp >1 that I am -> -> attempting to sort out. My current theory is that it is an iothread locking -> -> issue. -> -> -Weird coincidence, that is a bug that's been in the tree for months -> -but was only reported to me earlier this week. Try reverting -> -commit a82fd5a4ec24d923ff1e -- that should fix it. -I can confirm that reverting a82fd5a4ec24d923ff1e fixes it for me. -Thanks for the help and fast response! - --Aaron - diff --git a/classification_output/01/instruction/33802194 b/classification_output/01/instruction/33802194 new file mode 100644 index 000000000..b8e563ad9 --- /dev/null +++ b/classification_output/01/instruction/33802194 @@ -0,0 +1,4939 @@ +instruction: 0.693 +mistranslation: 0.687 +semantic: 0.656 +other: 0.637 + +[BUG] cxl can not create region + +Hi list + +I want to test cxl functions in arm64, and found some problems I can't +figure out. + +My test environment: + +1. build latest bios from +https://github.com/tianocore/edk2.git +master +branch(cc2db6ebfb6d9d85ba4c7b35fba1fa37fffc0bc2) +2. build latest qemu-system-aarch64 from git://git.qemu.org/qemu.git +master branch(846dcf0ba4eff824c295f06550b8673ff3f31314). With cxl arm +support patch: +https://patchwork.kernel.org/project/cxl/cover/20220616141950.23374-1-Jonathan.Cameron@huawei.com/ +3. build Linux kernel from +https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git +preview +branch(65fc1c3d26b96002a5aa1f4012fae4dc98fd5683) +4. build latest ndctl tools from +https://github.com/pmem/ndctl +create_region branch(8558b394e449779e3a4f3ae90fae77ede0bca159) + +And my qemu test commands: +sudo $QEMU_BIN -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 \ + -cpu max -smp 8 -nographic -no-reboot \ + -kernel $KERNEL -bios $BIOS_BIN \ + -drive if=none,file=$ROOTFS,format=qcow2,id=hd \ + -device virtio-blk-pci,drive=hd -append 'root=/dev/vda1 +nokaslr dyndbg="module cxl* +p"' \ + -object memory-backend-ram,size=4G,id=mem0 \ + -numa node,nodeid=0,cpus=0-7,memdev=mem0 \ + -net nic -net user,hostfwd=tcp::2222-:22 -enable-kvm \ + -object +memory-backend-file,id=cxl-mem0,share=on,mem-path=/tmp/cxltest.raw,size=256M +\ + -object +memory-backend-file,id=cxl-mem1,share=on,mem-path=/tmp/cxltest1.raw,size=256M +\ + -object +memory-backend-file,id=cxl-mem2,share=on,mem-path=/tmp/cxltest2.raw,size=256M +\ + -object +memory-backend-file,id=cxl-mem3,share=on,mem-path=/tmp/cxltest3.raw,size=256M +\ + -object +memory-backend-file,id=cxl-lsa0,share=on,mem-path=/tmp/lsa0.raw,size=256M +\ + -object +memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/lsa1.raw,size=256M +\ + -object +memory-backend-file,id=cxl-lsa2,share=on,mem-path=/tmp/lsa2.raw,size=256M +\ + -object +memory-backend-file,id=cxl-lsa3,share=on,mem-path=/tmp/lsa3.raw,size=256M +\ + -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \ + -device cxl-rp,port=0,bus=cxl.1,id=root_port0,chassis=0,slot=0 \ + -device cxl-upstream,bus=root_port0,id=us0 \ + -device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \ + -device +cxl-type3,bus=swport0,memdev=cxl-mem0,lsa=cxl-lsa0,id=cxl-pmem0 \ + -device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \ + -device +cxl-type3,bus=swport1,memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem1 \ + -device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \ + -device +cxl-type3,bus=swport2,memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem2 \ + -device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \ + -device +cxl-type3,bus=swport3,memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3 \ + -M +cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k + +And I have got two problems. +1. When I want to create x1 region with command: "cxl create-region -d +decoder0.0 -w 1 -g 4096 mem0", kernel crashed with null pointer +reference. Crash log: + +[ 534.697324] cxl_region region0: config state: 0 +[ 534.697346] cxl_region region0: probe: -6 +[ 534.697368] cxl_acpi ACPI0017:00: decoder0.0: created region0 +[ 534.699115] cxl region0: mem0:endpoint3 decoder3.0 add: +mem0:decoder3.0 @ 0 next: none nr_eps: 1 nr_targets: 1 +[ 534.699149] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +mem0:decoder3.0 @ 0 next: mem0 nr_eps: 1 nr_targets: 1 +[ 534.699167] cxl region0: ACPI0016:00:port1 decoder1.0 add: +mem0:decoder3.0 @ 0 next: 0000:0d:00.0 nr_eps: 1 nr_targets: 1 +[ 534.699176] cxl region0: ACPI0016:00:port1 iw: 1 ig: 256 +[ 534.699182] cxl region0: ACPI0016:00:port1 target[0] = 0000:0c:00.0 +for mem0:decoder3.0 @ 0 +[ 534.699189] cxl region0: 0000:0d:00.0:port2 iw: 1 ig: 256 +[ 534.699193] cxl region0: 0000:0d:00.0:port2 target[0] = +0000:0e:00.0 for mem0:decoder3.0 @ 0 +[ 534.699405] Unable to handle kernel NULL pointer dereference at +virtual address 0000000000000000 +[ 534.701474] Mem abort info: +[ 534.701994] ESR = 0x0000000086000004 +[ 534.702653] EC = 0x21: IABT (current EL), IL = 32 bits +[ 534.703616] SET = 0, FnV = 0 +[ 534.704174] EA = 0, S1PTW = 0 +[ 534.704803] FSC = 0x04: level 0 translation fault +[ 534.705694] user pgtable: 4k pages, 48-bit VAs, pgdp=000000010144a000 +[ 534.706875] [0000000000000000] pgd=0000000000000000, p4d=0000000000000000 +[ 534.709855] Internal error: Oops: 86000004 [#1] PREEMPT SMP +[ 534.710301] Modules linked in: +[ 534.710546] CPU: 7 PID: 331 Comm: cxl Not tainted +5.19.0-rc3-00064-g65fc1c3d26b9-dirty #11 +[ 534.715393] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 +[ 534.717179] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) +[ 534.719190] pc : 0x0 +[ 534.719928] lr : commit_store+0x118/0x2cc +[ 534.721007] sp : ffff80000aec3c30 +[ 534.721793] x29: ffff80000aec3c30 x28: ffff0000da62e740 x27: ffff0000c0c06b30 +[ 534.723875] x26: 0000000000000000 x25: ffff0000c0a2a400 x24: ffff0000c0a29400 +[ 534.725440] x23: 0000000000000003 x22: 0000000000000000 x21: ffff0000c0c06800 +[ 534.727312] x20: 0000000000000000 x19: ffff0000c1559800 x18: 0000000000000000 +[ 534.729138] x17: 0000000000000000 x16: 0000000000000000 x15: 0000ffffd41fe838 +[ 534.731046] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 +[ 534.732402] x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000 +[ 534.734432] x8 : 0000000000000000 x7 : 0000000000000000 x6 : ffff0000c0906e80 +[ 534.735921] x5 : 0000000000000000 x4 : 0000000000000000 x3 : ffff80000aec3bf0 +[ 534.737437] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff0000c155a000 +[ 534.738878] Call trace: +[ 534.739368] 0x0 +[ 534.739713] dev_attr_store+0x1c/0x30 +[ 534.740186] sysfs_kf_write+0x48/0x58 +[ 534.740961] kernfs_fop_write_iter+0x128/0x184 +[ 534.741872] new_sync_write+0xdc/0x158 +[ 534.742706] vfs_write+0x1ac/0x2a8 +[ 534.743440] ksys_write+0x68/0xf0 +[ 534.744328] __arm64_sys_write+0x1c/0x28 +[ 534.745180] invoke_syscall+0x44/0xf0 +[ 534.745989] el0_svc_common+0x4c/0xfc +[ 534.746661] do_el0_svc+0x60/0xa8 +[ 534.747378] el0_svc+0x2c/0x78 +[ 534.748066] el0t_64_sync_handler+0xb8/0x12c +[ 534.748919] el0t_64_sync+0x18c/0x190 +[ 534.749629] Code: bad PC value +[ 534.750169] ---[ end trace 0000000000000000 ]--- + +2. When I want to create x4 region with command: "cxl create-region -d +decoder0.0 -w 4 -g 4096 -m mem0 mem1 mem2 mem3". I got below errors: + +cxl region: create_region: region0: failed to set target3 to mem3 +cxl region: cmd_create_region: created 0 regions + +And kernel log as below: +[ 60.536663] cxl_region region0: config state: 0 +[ 60.536675] cxl_region region0: probe: -6 +[ 60.536696] cxl_acpi ACPI0017:00: decoder0.0: created region0 +[ 60.538251] cxl region0: mem0:endpoint3 decoder3.0 add: +mem0:decoder3.0 @ 0 next: none nr_eps: 1 nr_targets: 1 +[ 60.538278] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +mem0:decoder3.0 @ 0 next: mem0 nr_eps: 1 nr_targets: 1 +[ 60.538295] cxl region0: ACPI0016:00:port1 decoder1.0 add: +mem0:decoder3.0 @ 0 next: 0000:0d:00.0 nr_eps: 1 nr_targets: 1 +[ 60.538647] cxl region0: mem1:endpoint4 decoder4.0 add: +mem1:decoder4.0 @ 1 next: none nr_eps: 1 nr_targets: 1 +[ 60.538663] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +mem1:decoder4.0 @ 1 next: mem1 nr_eps: 2 nr_targets: 2 +[ 60.538675] cxl region0: ACPI0016:00:port1 decoder1.0 add: +mem1:decoder4.0 @ 1 next: 0000:0d:00.0 nr_eps: 2 nr_targets: 1 +[ 60.539311] cxl region0: mem2:endpoint5 decoder5.0 add: +mem2:decoder5.0 @ 2 next: none nr_eps: 1 nr_targets: 1 +[ 60.539332] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +mem2:decoder5.0 @ 2 next: mem2 nr_eps: 3 nr_targets: 3 +[ 60.539343] cxl region0: ACPI0016:00:port1 decoder1.0 add: +mem2:decoder5.0 @ 2 next: 0000:0d:00.0 nr_eps: 3 nr_targets: 1 +[ 60.539711] cxl region0: mem3:endpoint6 decoder6.0 add: +mem3:decoder6.0 @ 3 next: none nr_eps: 1 nr_targets: 1 +[ 60.539723] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +mem3:decoder6.0 @ 3 next: mem3 nr_eps: 4 nr_targets: 4 +[ 60.539735] cxl region0: ACPI0016:00:port1 decoder1.0 add: +mem3:decoder6.0 @ 3 next: 0000:0d:00.0 nr_eps: 4 nr_targets: 1 +[ 60.539742] cxl region0: ACPI0016:00:port1 iw: 1 ig: 256 +[ 60.539747] cxl region0: ACPI0016:00:port1 target[0] = 0000:0c:00.0 +for mem0:decoder3.0 @ 0 +[ 60.539754] cxl region0: 0000:0d:00.0:port2 iw: 4 ig: 512 +[ 60.539758] cxl region0: 0000:0d:00.0:port2 target[0] = +0000:0e:00.0 for mem0:decoder3.0 @ 0 +[ 60.539764] cxl region0: ACPI0016:00:port1: cannot host mem1:decoder4.0 at 1 + +I have tried to write sysfs node manually, got same errors. + +Hope I can get some helps here. + +Bob + +On Fri, 5 Aug 2022 10:20:23 +0800 +Bobo WL wrote: + +> +Hi list +> +> +I want to test cxl functions in arm64, and found some problems I can't +> +figure out. +Hi Bob, + +Glad to see people testing this code. + +> +> +My test environment: +> +> +1. build latest bios from +https://github.com/tianocore/edk2.git +master +> +branch(cc2db6ebfb6d9d85ba4c7b35fba1fa37fffc0bc2) +> +2. build latest qemu-system-aarch64 from git://git.qemu.org/qemu.git +> +master branch(846dcf0ba4eff824c295f06550b8673ff3f31314). With cxl arm +> +support patch: +> +https://patchwork.kernel.org/project/cxl/cover/20220616141950.23374-1-Jonathan.Cameron@huawei.com/ +> +3. build Linux kernel from +> +https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git +preview +> +branch(65fc1c3d26b96002a5aa1f4012fae4dc98fd5683) +> +4. build latest ndctl tools from +https://github.com/pmem/ndctl +> +create_region branch(8558b394e449779e3a4f3ae90fae77ede0bca159) +> +> +And my qemu test commands: +> +sudo $QEMU_BIN -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 \ +> +-cpu max -smp 8 -nographic -no-reboot \ +> +-kernel $KERNEL -bios $BIOS_BIN \ +> +-drive if=none,file=$ROOTFS,format=qcow2,id=hd \ +> +-device virtio-blk-pci,drive=hd -append 'root=/dev/vda1 +> +nokaslr dyndbg="module cxl* +p"' \ +> +-object memory-backend-ram,size=4G,id=mem0 \ +> +-numa node,nodeid=0,cpus=0-7,memdev=mem0 \ +> +-net nic -net user,hostfwd=tcp::2222-:22 -enable-kvm \ +> +-object +> +memory-backend-file,id=cxl-mem0,share=on,mem-path=/tmp/cxltest.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-mem1,share=on,mem-path=/tmp/cxltest1.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-mem2,share=on,mem-path=/tmp/cxltest2.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-mem3,share=on,mem-path=/tmp/cxltest3.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-lsa0,share=on,mem-path=/tmp/lsa0.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/lsa1.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-lsa2,share=on,mem-path=/tmp/lsa2.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-lsa3,share=on,mem-path=/tmp/lsa3.raw,size=256M +> +\ +> +-device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \ +> +-device cxl-rp,port=0,bus=cxl.1,id=root_port0,chassis=0,slot=0 \ +Probably not related to your problem, but there is a disconnect in QEMU / +kernel assumptionsaround the presence of an HDM decoder when a HB only +has a single root port. Spec allows it to be provided or not as an +implementation choice. +Kernel assumes it isn't provide. Qemu assumes it is. + +The temporary solution is to throw in a second root port on the HB and not +connect anything to it. Longer term I may special case this so that the +particular +decoder defaults to pass through settings in QEMU if there is only one root +port. + +> +-device cxl-upstream,bus=root_port0,id=us0 \ +> +-device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \ +> +-device +> +cxl-type3,bus=swport0,memdev=cxl-mem0,lsa=cxl-lsa0,id=cxl-pmem0 \ +> +-device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \ +> +-device +> +cxl-type3,bus=swport1,memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem1 \ +> +-device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \ +> +-device +> +cxl-type3,bus=swport2,memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem2 \ +> +-device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \ +> +-device +> +cxl-type3,bus=swport3,memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3 \ +> +-M +> +cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k +> +> +And I have got two problems. +> +1. When I want to create x1 region with command: "cxl create-region -d +> +decoder0.0 -w 1 -g 4096 mem0", kernel crashed with null pointer +> +reference. Crash log: +> +> +[ 534.697324] cxl_region region0: config state: 0 +> +[ 534.697346] cxl_region region0: probe: -6 +Seems odd this is up here. But maybe fine. + +> +[ 534.697368] cxl_acpi ACPI0017:00: decoder0.0: created region0 +> +[ 534.699115] cxl region0: mem0:endpoint3 decoder3.0 add: +> +mem0:decoder3.0 @ 0 next: none nr_eps: 1 nr_targets: 1 +> +[ 534.699149] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +> +mem0:decoder3.0 @ 0 next: mem0 nr_eps: 1 nr_targets: 1 +> +[ 534.699167] cxl region0: ACPI0016:00:port1 decoder1.0 add: +> +mem0:decoder3.0 @ 0 next: 0000:0d:00.0 nr_eps: 1 nr_targets: 1 +> +[ 534.699176] cxl region0: ACPI0016:00:port1 iw: 1 ig: 256 +> +[ 534.699182] cxl region0: ACPI0016:00:port1 target[0] = 0000:0c:00.0 +> +for mem0:decoder3.0 @ 0 +> +[ 534.699189] cxl region0: 0000:0d:00.0:port2 iw: 1 ig: 256 +> +[ 534.699193] cxl region0: 0000:0d:00.0:port2 target[0] = +> +0000:0e:00.0 for mem0:decoder3.0 @ 0 +> +[ 534.699405] Unable to handle kernel NULL pointer dereference at +> +virtual address 0000000000000000 +> +[ 534.701474] Mem abort info: +> +[ 534.701994] ESR = 0x0000000086000004 +> +[ 534.702653] EC = 0x21: IABT (current EL), IL = 32 bits +> +[ 534.703616] SET = 0, FnV = 0 +> +[ 534.704174] EA = 0, S1PTW = 0 +> +[ 534.704803] FSC = 0x04: level 0 translation fault +> +[ 534.705694] user pgtable: 4k pages, 48-bit VAs, pgdp=000000010144a000 +> +[ 534.706875] [0000000000000000] pgd=0000000000000000, p4d=0000000000000000 +> +[ 534.709855] Internal error: Oops: 86000004 [#1] PREEMPT SMP +> +[ 534.710301] Modules linked in: +> +[ 534.710546] CPU: 7 PID: 331 Comm: cxl Not tainted +> +5.19.0-rc3-00064-g65fc1c3d26b9-dirty #11 +> +[ 534.715393] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 +> +[ 534.717179] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) +> +[ 534.719190] pc : 0x0 +> +[ 534.719928] lr : commit_store+0x118/0x2cc +> +[ 534.721007] sp : ffff80000aec3c30 +> +[ 534.721793] x29: ffff80000aec3c30 x28: ffff0000da62e740 x27: +> +ffff0000c0c06b30 +> +[ 534.723875] x26: 0000000000000000 x25: ffff0000c0a2a400 x24: +> +ffff0000c0a29400 +> +[ 534.725440] x23: 0000000000000003 x22: 0000000000000000 x21: +> +ffff0000c0c06800 +> +[ 534.727312] x20: 0000000000000000 x19: ffff0000c1559800 x18: +> +0000000000000000 +> +[ 534.729138] x17: 0000000000000000 x16: 0000000000000000 x15: +> +0000ffffd41fe838 +> +[ 534.731046] x14: 0000000000000000 x13: 0000000000000000 x12: +> +0000000000000000 +> +[ 534.732402] x11: 0000000000000000 x10: 0000000000000000 x9 : +> +0000000000000000 +> +[ 534.734432] x8 : 0000000000000000 x7 : 0000000000000000 x6 : +> +ffff0000c0906e80 +> +[ 534.735921] x5 : 0000000000000000 x4 : 0000000000000000 x3 : +> +ffff80000aec3bf0 +> +[ 534.737437] x2 : 0000000000000000 x1 : 0000000000000000 x0 : +> +ffff0000c155a000 +> +[ 534.738878] Call trace: +> +[ 534.739368] 0x0 +> +[ 534.739713] dev_attr_store+0x1c/0x30 +> +[ 534.740186] sysfs_kf_write+0x48/0x58 +> +[ 534.740961] kernfs_fop_write_iter+0x128/0x184 +> +[ 534.741872] new_sync_write+0xdc/0x158 +> +[ 534.742706] vfs_write+0x1ac/0x2a8 +> +[ 534.743440] ksys_write+0x68/0xf0 +> +[ 534.744328] __arm64_sys_write+0x1c/0x28 +> +[ 534.745180] invoke_syscall+0x44/0xf0 +> +[ 534.745989] el0_svc_common+0x4c/0xfc +> +[ 534.746661] do_el0_svc+0x60/0xa8 +> +[ 534.747378] el0_svc+0x2c/0x78 +> +[ 534.748066] el0t_64_sync_handler+0xb8/0x12c +> +[ 534.748919] el0t_64_sync+0x18c/0x190 +> +[ 534.749629] Code: bad PC value +> +[ 534.750169] ---[ end trace 0000000000000000 ]--- +> +> +2. When I want to create x4 region with command: "cxl create-region -d +> +decoder0.0 -w 4 -g 4096 -m mem0 mem1 mem2 mem3". I got below errors: +> +> +cxl region: create_region: region0: failed to set target3 to mem3 +> +cxl region: cmd_create_region: created 0 regions +> +> +And kernel log as below: +> +[ 60.536663] cxl_region region0: config state: 0 +> +[ 60.536675] cxl_region region0: probe: -6 +> +[ 60.536696] cxl_acpi ACPI0017:00: decoder0.0: created region0 +> +[ 60.538251] cxl region0: mem0:endpoint3 decoder3.0 add: +> +mem0:decoder3.0 @ 0 next: none nr_eps: 1 nr_targets: 1 +> +[ 60.538278] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +> +mem0:decoder3.0 @ 0 next: mem0 nr_eps: 1 nr_targets: 1 +> +[ 60.538295] cxl region0: ACPI0016:00:port1 decoder1.0 add: +> +mem0:decoder3.0 @ 0 next: 0000:0d:00.0 nr_eps: 1 nr_targets: 1 +> +[ 60.538647] cxl region0: mem1:endpoint4 decoder4.0 add: +> +mem1:decoder4.0 @ 1 next: none nr_eps: 1 nr_targets: 1 +> +[ 60.538663] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +> +mem1:decoder4.0 @ 1 next: mem1 nr_eps: 2 nr_targets: 2 +> +[ 60.538675] cxl region0: ACPI0016:00:port1 decoder1.0 add: +> +mem1:decoder4.0 @ 1 next: 0000:0d:00.0 nr_eps: 2 nr_targets: 1 +> +[ 60.539311] cxl region0: mem2:endpoint5 decoder5.0 add: +> +mem2:decoder5.0 @ 2 next: none nr_eps: 1 nr_targets: 1 +> +[ 60.539332] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +> +mem2:decoder5.0 @ 2 next: mem2 nr_eps: 3 nr_targets: 3 +> +[ 60.539343] cxl region0: ACPI0016:00:port1 decoder1.0 add: +> +mem2:decoder5.0 @ 2 next: 0000:0d:00.0 nr_eps: 3 nr_targets: 1 +> +[ 60.539711] cxl region0: mem3:endpoint6 decoder6.0 add: +> +mem3:decoder6.0 @ 3 next: none nr_eps: 1 nr_targets: 1 +> +[ 60.539723] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +> +mem3:decoder6.0 @ 3 next: mem3 nr_eps: 4 nr_targets: 4 +> +[ 60.539735] cxl region0: ACPI0016:00:port1 decoder1.0 add: +> +mem3:decoder6.0 @ 3 next: 0000:0d:00.0 nr_eps: 4 nr_targets: 1 +> +[ 60.539742] cxl region0: ACPI0016:00:port1 iw: 1 ig: 256 +> +[ 60.539747] cxl region0: ACPI0016:00:port1 target[0] = 0000:0c:00.0 +> +for mem0:decoder3.0 @ 0 +> +[ 60.539754] cxl region0: 0000:0d:00.0:port2 iw: 4 ig: 512 +This looks like off by 1 that should be fixed in the below mentioned +cxl/pending branch. That ig should be 256. Note the fix was +for a test case with a fat HB and no switch, but certainly looks +like this is the same issue. + +> +[ 60.539758] cxl region0: 0000:0d:00.0:port2 target[0] = +> +0000:0e:00.0 for mem0:decoder3.0 @ 0 +> +[ 60.539764] cxl region0: ACPI0016:00:port1: cannot host mem1:decoder4.0 at +> +1 +> +> +I have tried to write sysfs node manually, got same errors. +When stepping through by hand, which sysfs write triggers the crash above? + +Not sure it's related, but I've just sent out a fix to the +target register handling in QEMU. +20220808122051.14822-1-Jonathan.Cameron@huawei.com +/T/#m47ff985412ce44559e6b04d677c302f8cd371330">https://lore.kernel.org/linux-cxl/ +20220808122051.14822-1-Jonathan.Cameron@huawei.com +/T/#m47ff985412ce44559e6b04d677c302f8cd371330 +I did have one instance last week of triggering what looked to be a race +condition but +the stack trace doesn't looks related to what you've hit. + +It will probably be a few days before I have time to take a look at replicating +what you have seen. + +If you have time, try using the kernel.org cxl/pending branch as there are +a few additional fixes on there since you sent this email. Optimistic to hope +this is covered by one of those, but at least it will mean we are trying to +replicate +on same branch. + +Jonathan + + +> +> +Hope I can get some helps here. +> +> +Bob + +Hi Jonathan + +Thanks for your reply! + +On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron + wrote: +> +> +Probably not related to your problem, but there is a disconnect in QEMU / +> +kernel assumptionsaround the presence of an HDM decoder when a HB only +> +has a single root port. Spec allows it to be provided or not as an +> +implementation choice. +> +Kernel assumes it isn't provide. Qemu assumes it is. +> +> +The temporary solution is to throw in a second root port on the HB and not +> +connect anything to it. Longer term I may special case this so that the +> +particular +> +decoder defaults to pass through settings in QEMU if there is only one root +> +port. +> +You are right! After adding an extra HB in qemu, I can create a x1 +region successfully. +But have some errors in Nvdimm: + +[ 74.925838] Unknown online node for memory at 0x10000000000, assuming node 0 +[ 74.925846] Unknown target node for memory at 0x10000000000, assuming node 0 +[ 74.927470] nd_region region0: nmem0: is disabled, failing probe + +And x4 region still failed with same errors, using latest cxl/preview +branch don't work. +I have picked "Two CXL emulation fixes" patches in qemu, still not working. + +Bob + +On Tue, 9 Aug 2022 21:07:06 +0800 +Bobo WL wrote: + +> +Hi Jonathan +> +> +Thanks for your reply! +> +> +On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> + wrote: +> +> +> +> Probably not related to your problem, but there is a disconnect in QEMU / +> +> kernel assumptionsaround the presence of an HDM decoder when a HB only +> +> has a single root port. Spec allows it to be provided or not as an +> +> implementation choice. +> +> Kernel assumes it isn't provide. Qemu assumes it is. +> +> +> +> The temporary solution is to throw in a second root port on the HB and not +> +> connect anything to it. Longer term I may special case this so that the +> +> particular +> +> decoder defaults to pass through settings in QEMU if there is only one root +> +> port. +> +> +> +> +You are right! After adding an extra HB in qemu, I can create a x1 +> +region successfully. +> +But have some errors in Nvdimm: +> +> +[ 74.925838] Unknown online node for memory at 0x10000000000, assuming node > 0 +> +[ 74.925846] Unknown target node for memory at 0x10000000000, assuming node > 0 +> +[ 74.927470] nd_region region0: nmem0: is disabled, failing probe +Ah. I've seen this one, but not chased it down yet. Was on my todo list to +chase +down. Once I reach this state I can verify the HDM Decode is correct which is +what +I've been using to test (Which wasn't true until earlier this week). +I'm currently testing via devmem, more for historical reasons than because it +makes +that much sense anymore. + +> +> +And x4 region still failed with same errors, using latest cxl/preview +> +branch don't work. +> +I have picked "Two CXL emulation fixes" patches in qemu, still not working. +> +> +Bob + +On Tue, 9 Aug 2022 17:08:25 +0100 +Jonathan Cameron wrote: + +> +On Tue, 9 Aug 2022 21:07:06 +0800 +> +Bobo WL wrote: +> +> +> Hi Jonathan +> +> +> +> Thanks for your reply! +> +> +> +> On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +> wrote: +> +> > +> +> > Probably not related to your problem, but there is a disconnect in QEMU / +> +> > kernel assumptionsaround the presence of an HDM decoder when a HB only +> +> > has a single root port. Spec allows it to be provided or not as an +> +> > implementation choice. +> +> > Kernel assumes it isn't provide. Qemu assumes it is. +> +> > +> +> > The temporary solution is to throw in a second root port on the HB and not +> +> > connect anything to it. Longer term I may special case this so that the +> +> > particular +> +> > decoder defaults to pass through settings in QEMU if there is only one +> +> > root port. +> +> > +> +> +> +> You are right! After adding an extra HB in qemu, I can create a x1 +> +> region successfully. +> +> But have some errors in Nvdimm: +> +> +> +> [ 74.925838] Unknown online node for memory at 0x10000000000, assuming +> +> node 0 +> +> [ 74.925846] Unknown target node for memory at 0x10000000000, assuming +> +> node 0 +> +> [ 74.927470] nd_region region0: nmem0: is disabled, failing probe +> +> +Ah. I've seen this one, but not chased it down yet. Was on my todo list to +> +chase +> +down. Once I reach this state I can verify the HDM Decode is correct which is +> +what +> +I've been using to test (Which wasn't true until earlier this week). +> +I'm currently testing via devmem, more for historical reasons than because it +> +makes +> +that much sense anymore. +*embarassed cough*. We haven't fully hooked the LSA up in qemu yet. +I'd forgotten that was still on the todo list. I don't think it will +be particularly hard to do and will take a look in next few days. + +Very very indirectly this error is causing a driver probe fail that means that +we hit a code path that has a rather odd looking check on NDD_LABELING. +Should not have gotten near that path though - hence the problem is actually +when we call cxl_pmem_get_config_data() and it returns an error because +we haven't fully connected up the command in QEMU. + +Jonathan + + +> +> +> +> +> And x4 region still failed with same errors, using latest cxl/preview +> +> branch don't work. +> +> I have picked "Two CXL emulation fixes" patches in qemu, still not working. +> +> +> +> Bob + +On Thu, 11 Aug 2022 18:08:57 +0100 +Jonathan Cameron via wrote: + +> +On Tue, 9 Aug 2022 17:08:25 +0100 +> +Jonathan Cameron wrote: +> +> +> On Tue, 9 Aug 2022 21:07:06 +0800 +> +> Bobo WL wrote: +> +> +> +> > Hi Jonathan +> +> > +> +> > Thanks for your reply! +> +> > +> +> > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +> > wrote: +> +> > > +> +> > > Probably not related to your problem, but there is a disconnect in QEMU +> +> > > / +> +> > > kernel assumptionsaround the presence of an HDM decoder when a HB only +> +> > > has a single root port. Spec allows it to be provided or not as an +> +> > > implementation choice. +> +> > > Kernel assumes it isn't provide. Qemu assumes it is. +> +> > > +> +> > > The temporary solution is to throw in a second root port on the HB and +> +> > > not +> +> > > connect anything to it. Longer term I may special case this so that +> +> > > the particular +> +> > > decoder defaults to pass through settings in QEMU if there is only one +> +> > > root port. +> +> > > +> +> > +> +> > You are right! After adding an extra HB in qemu, I can create a x1 +> +> > region successfully. +> +> > But have some errors in Nvdimm: +> +> > +> +> > [ 74.925838] Unknown online node for memory at 0x10000000000, assuming +> +> > node 0 +> +> > [ 74.925846] Unknown target node for memory at 0x10000000000, assuming +> +> > node 0 +> +> > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe +> +> +> +> Ah. I've seen this one, but not chased it down yet. Was on my todo list to +> +> chase +> +> down. Once I reach this state I can verify the HDM Decode is correct which +> +> is what +> +> I've been using to test (Which wasn't true until earlier this week). +> +> I'm currently testing via devmem, more for historical reasons than because +> +> it makes +> +> that much sense anymore. +> +> +*embarassed cough*. We haven't fully hooked the LSA up in qemu yet. +> +I'd forgotten that was still on the todo list. I don't think it will +> +be particularly hard to do and will take a look in next few days. +> +> +Very very indirectly this error is causing a driver probe fail that means that +> +we hit a code path that has a rather odd looking check on NDD_LABELING. +> +Should not have gotten near that path though - hence the problem is actually +> +when we call cxl_pmem_get_config_data() and it returns an error because +> +we haven't fully connected up the command in QEMU. +So a least one bug in QEMU. We were not supporting variable length payloads on +mailbox +inputs (but were on outputs). That hasn't mattered until we get to LSA writes. +We just need to relax condition on the supplied length. + +diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +index c352a935c4..fdda9529fe 100644 +--- a/hw/cxl/cxl-mailbox-utils.c ++++ b/hw/cxl/cxl-mailbox-utils.c +@@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) + cxl_cmd = &cxl_cmd_set[set][cmd]; + h = cxl_cmd->handler; + if (h) { +- if (len == cxl_cmd->in) { ++ if (len == cxl_cmd->in || !cxl_cmd->in) { + cxl_cmd->payload = cxl_dstate->mbox_reg_state + + A_CXL_DEV_CMD_PAYLOAD; + ret = (*h)(cxl_cmd, cxl_dstate, &len); + + +This lets the nvdimm/region probe fine, but I'm getting some issues with +namespace capacity so I'll look at what is causing that next. +Unfortunately I'm not that familiar with the driver/nvdimm side of things +so it's take a while to figure out what kicks off what! + +Jonathan + +> +> +Jonathan +> +> +> +> +> +> > +> +> > And x4 region still failed with same errors, using latest cxl/preview +> +> > branch don't work. +> +> > I have picked "Two CXL emulation fixes" patches in qemu, still not +> +> > working. +> +> > +> +> > Bob +> +> + +Jonathan Cameron wrote: +> +On Thu, 11 Aug 2022 18:08:57 +0100 +> +Jonathan Cameron via wrote: +> +> +> On Tue, 9 Aug 2022 17:08:25 +0100 +> +> Jonathan Cameron wrote: +> +> +> +> > On Tue, 9 Aug 2022 21:07:06 +0800 +> +> > Bobo WL wrote: +> +> > +> +> > > Hi Jonathan +> +> > > +> +> > > Thanks for your reply! +> +> > > +> +> > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +> > > wrote: +> +> > > > +> +> > > > Probably not related to your problem, but there is a disconnect in +> +> > > > QEMU / +> +> > > > kernel assumptionsaround the presence of an HDM decoder when a HB only +> +> > > > has a single root port. Spec allows it to be provided or not as an +> +> > > > implementation choice. +> +> > > > Kernel assumes it isn't provide. Qemu assumes it is. +> +> > > > +> +> > > > The temporary solution is to throw in a second root port on the HB +> +> > > > and not +> +> > > > connect anything to it. Longer term I may special case this so that +> +> > > > the particular +> +> > > > decoder defaults to pass through settings in QEMU if there is only +> +> > > > one root port. +> +> > > > +> +> > > +> +> > > You are right! After adding an extra HB in qemu, I can create a x1 +> +> > > region successfully. +> +> > > But have some errors in Nvdimm: +> +> > > +> +> > > [ 74.925838] Unknown online node for memory at 0x10000000000, +> +> > > assuming node 0 +> +> > > [ 74.925846] Unknown target node for memory at 0x10000000000, +> +> > > assuming node 0 +> +> > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe +> +> > +> +> > Ah. I've seen this one, but not chased it down yet. Was on my todo list +> +> > to chase +> +> > down. Once I reach this state I can verify the HDM Decode is correct +> +> > which is what +> +> > I've been using to test (Which wasn't true until earlier this week). +> +> > I'm currently testing via devmem, more for historical reasons than +> +> > because it makes +> +> > that much sense anymore. +> +> +> +> *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. +> +> I'd forgotten that was still on the todo list. I don't think it will +> +> be particularly hard to do and will take a look in next few days. +> +> +> +> Very very indirectly this error is causing a driver probe fail that means +> +> that +> +> we hit a code path that has a rather odd looking check on NDD_LABELING. +> +> Should not have gotten near that path though - hence the problem is actually +> +> when we call cxl_pmem_get_config_data() and it returns an error because +> +> we haven't fully connected up the command in QEMU. +> +> +So a least one bug in QEMU. We were not supporting variable length payloads +> +on mailbox +> +inputs (but were on outputs). That hasn't mattered until we get to LSA +> +writes. +> +We just need to relax condition on the supplied length. +> +> +diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +> +index c352a935c4..fdda9529fe 100644 +> +--- a/hw/cxl/cxl-mailbox-utils.c +> ++++ b/hw/cxl/cxl-mailbox-utils.c +> +@@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) +> +cxl_cmd = &cxl_cmd_set[set][cmd]; +> +h = cxl_cmd->handler; +> +if (h) { +> +- if (len == cxl_cmd->in) { +> ++ if (len == cxl_cmd->in || !cxl_cmd->in) { +> +cxl_cmd->payload = cxl_dstate->mbox_reg_state + +> +A_CXL_DEV_CMD_PAYLOAD; +> +ret = (*h)(cxl_cmd, cxl_dstate, &len); +> +> +> +This lets the nvdimm/region probe fine, but I'm getting some issues with +> +namespace capacity so I'll look at what is causing that next. +> +Unfortunately I'm not that familiar with the driver/nvdimm side of things +> +so it's take a while to figure out what kicks off what! +The whirlwind tour is that 'struct nd_region' instances that represent a +persitent memory address range are composed of one more mappings of +'struct nvdimm' objects. The nvdimm object is driven by the dimm driver +in drivers/nvdimm/dimm.c. That driver is mainly charged with unlocking +the dimm (if locked) and interrogating the label area to look for +namespace labels. + +The label command calls are routed to the '->ndctl()' callback that was +registered when the CXL nvdimm_bus_descriptor was created. That callback +handles both 'bus' scope calls, currently none for CXL, and per nvdimm +calls. cxl_pmem_nvdimm_ctl() translates those generic LIBNVDIMM commands +to CXL commands. + +The 'struct nvdimm' objects that the CXL side registers have the +NDD_LABELING flag set which means that namespaces need to be explicitly +created / provisioned from region capacity. Otherwise, if +drivers/nvdimm/dimm.c does not find a namespace-label-index block then +the region reverts to label-less mode and a default namespace equal to +the size of the region is instantiated. + +If you are seeing small mismatches in namespace capacity then it may +just be the fact that by default 'ndctl create-namespace' results in an +'fsdax' mode namespace which just means that it is a block device where +1.5% of the capacity is reserved for 'struct page' metadata. You should +be able to see namespace capacity == region capacity by doing "ndctl +create-namespace -m raw", and disable DAX operation. + +Hope that helps. + +On Fri, 12 Aug 2022 09:03:02 -0700 +Dan Williams wrote: + +> +Jonathan Cameron wrote: +> +> On Thu, 11 Aug 2022 18:08:57 +0100 +> +> Jonathan Cameron via wrote: +> +> +> +> > On Tue, 9 Aug 2022 17:08:25 +0100 +> +> > Jonathan Cameron wrote: +> +> > +> +> > > On Tue, 9 Aug 2022 21:07:06 +0800 +> +> > > Bobo WL wrote: +> +> > > +> +> > > > Hi Jonathan +> +> > > > +> +> > > > Thanks for your reply! +> +> > > > +> +> > > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +> > > > wrote: +> +> > > > > +> +> > > > > Probably not related to your problem, but there is a disconnect in +> +> > > > > QEMU / +> +> > > > > kernel assumptionsaround the presence of an HDM decoder when a HB +> +> > > > > only +> +> > > > > has a single root port. Spec allows it to be provided or not as an +> +> > > > > implementation choice. +> +> > > > > Kernel assumes it isn't provide. Qemu assumes it is. +> +> > > > > +> +> > > > > The temporary solution is to throw in a second root port on the HB +> +> > > > > and not +> +> > > > > connect anything to it. Longer term I may special case this so +> +> > > > > that the particular +> +> > > > > decoder defaults to pass through settings in QEMU if there is only +> +> > > > > one root port. +> +> > > > > +> +> > > > +> +> > > > You are right! After adding an extra HB in qemu, I can create a x1 +> +> > > > region successfully. +> +> > > > But have some errors in Nvdimm: +> +> > > > +> +> > > > [ 74.925838] Unknown online node for memory at 0x10000000000, +> +> > > > assuming node 0 +> +> > > > [ 74.925846] Unknown target node for memory at 0x10000000000, +> +> > > > assuming node 0 +> +> > > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe +> +> > > > +> +> > > +> +> > > Ah. I've seen this one, but not chased it down yet. Was on my todo +> +> > > list to chase +> +> > > down. Once I reach this state I can verify the HDM Decode is correct +> +> > > which is what +> +> > > I've been using to test (Which wasn't true until earlier this week). +> +> > > I'm currently testing via devmem, more for historical reasons than +> +> > > because it makes +> +> > > that much sense anymore. +> +> > +> +> > *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. +> +> > I'd forgotten that was still on the todo list. I don't think it will +> +> > be particularly hard to do and will take a look in next few days. +> +> > +> +> > Very very indirectly this error is causing a driver probe fail that means +> +> > that +> +> > we hit a code path that has a rather odd looking check on NDD_LABELING. +> +> > Should not have gotten near that path though - hence the problem is +> +> > actually +> +> > when we call cxl_pmem_get_config_data() and it returns an error because +> +> > we haven't fully connected up the command in QEMU. +> +> +> +> So a least one bug in QEMU. We were not supporting variable length payloads +> +> on mailbox +> +> inputs (but were on outputs). That hasn't mattered until we get to LSA +> +> writes. +> +> We just need to relax condition on the supplied length. +> +> +> +> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +> +> index c352a935c4..fdda9529fe 100644 +> +> --- a/hw/cxl/cxl-mailbox-utils.c +> +> +++ b/hw/cxl/cxl-mailbox-utils.c +> +> @@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) +> +> cxl_cmd = &cxl_cmd_set[set][cmd]; +> +> h = cxl_cmd->handler; +> +> if (h) { +> +> - if (len == cxl_cmd->in) { +> +> + if (len == cxl_cmd->in || !cxl_cmd->in) { +> +> cxl_cmd->payload = cxl_dstate->mbox_reg_state + +> +> A_CXL_DEV_CMD_PAYLOAD; +> +> ret = (*h)(cxl_cmd, cxl_dstate, &len); +> +> +> +> +> +> This lets the nvdimm/region probe fine, but I'm getting some issues with +> +> namespace capacity so I'll look at what is causing that next. +> +> Unfortunately I'm not that familiar with the driver/nvdimm side of things +> +> so it's take a while to figure out what kicks off what! +> +> +The whirlwind tour is that 'struct nd_region' instances that represent a +> +persitent memory address range are composed of one more mappings of +> +'struct nvdimm' objects. The nvdimm object is driven by the dimm driver +> +in drivers/nvdimm/dimm.c. That driver is mainly charged with unlocking +> +the dimm (if locked) and interrogating the label area to look for +> +namespace labels. +> +> +The label command calls are routed to the '->ndctl()' callback that was +> +registered when the CXL nvdimm_bus_descriptor was created. That callback +> +handles both 'bus' scope calls, currently none for CXL, and per nvdimm +> +calls. cxl_pmem_nvdimm_ctl() translates those generic LIBNVDIMM commands +> +to CXL commands. +> +> +The 'struct nvdimm' objects that the CXL side registers have the +> +NDD_LABELING flag set which means that namespaces need to be explicitly +> +created / provisioned from region capacity. Otherwise, if +> +drivers/nvdimm/dimm.c does not find a namespace-label-index block then +> +the region reverts to label-less mode and a default namespace equal to +> +the size of the region is instantiated. +> +> +If you are seeing small mismatches in namespace capacity then it may +> +just be the fact that by default 'ndctl create-namespace' results in an +> +'fsdax' mode namespace which just means that it is a block device where +> +1.5% of the capacity is reserved for 'struct page' metadata. You should +> +be able to see namespace capacity == region capacity by doing "ndctl +> +create-namespace -m raw", and disable DAX operation. +Currently ndctl create-namespace crashes qemu ;) +Which isn't ideal! + +> +> +Hope that helps. +Got me looking at the right code. Thanks! + +Jonathan + +On Fri, 12 Aug 2022 17:15:09 +0100 +Jonathan Cameron wrote: + +> +On Fri, 12 Aug 2022 09:03:02 -0700 +> +Dan Williams wrote: +> +> +> Jonathan Cameron wrote: +> +> > On Thu, 11 Aug 2022 18:08:57 +0100 +> +> > Jonathan Cameron via wrote: +> +> > +> +> > > On Tue, 9 Aug 2022 17:08:25 +0100 +> +> > > Jonathan Cameron wrote: +> +> > > +> +> > > > On Tue, 9 Aug 2022 21:07:06 +0800 +> +> > > > Bobo WL wrote: +> +> > > > +> +> > > > > Hi Jonathan +> +> > > > > +> +> > > > > Thanks for your reply! +> +> > > > > +> +> > > > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +> > > > > wrote: +> +> > > > > > +> +> > > > > > Probably not related to your problem, but there is a disconnect +> +> > > > > > in QEMU / +> +> > > > > > kernel assumptionsaround the presence of an HDM decoder when a HB +> +> > > > > > only +> +> > > > > > has a single root port. Spec allows it to be provided or not as +> +> > > > > > an implementation choice. +> +> > > > > > Kernel assumes it isn't provide. Qemu assumes it is. +> +> > > > > > +> +> > > > > > The temporary solution is to throw in a second root port on the +> +> > > > > > HB and not +> +> > > > > > connect anything to it. Longer term I may special case this so +> +> > > > > > that the particular +> +> > > > > > decoder defaults to pass through settings in QEMU if there is +> +> > > > > > only one root port. +> +> > > > > > +> +> > > > > +> +> > > > > You are right! After adding an extra HB in qemu, I can create a x1 +> +> > > > > region successfully. +> +> > > > > But have some errors in Nvdimm: +> +> > > > > +> +> > > > > [ 74.925838] Unknown online node for memory at 0x10000000000, +> +> > > > > assuming node 0 +> +> > > > > [ 74.925846] Unknown target node for memory at 0x10000000000, +> +> > > > > assuming node 0 +> +> > > > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe +> +> > > > > +> +> > > > +> +> > > > Ah. I've seen this one, but not chased it down yet. Was on my todo +> +> > > > list to chase +> +> > > > down. Once I reach this state I can verify the HDM Decode is correct +> +> > > > which is what +> +> > > > I've been using to test (Which wasn't true until earlier this week). +> +> > > > I'm currently testing via devmem, more for historical reasons than +> +> > > > because it makes +> +> > > > that much sense anymore. +> +> > > +> +> > > *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. +> +> > > I'd forgotten that was still on the todo list. I don't think it will +> +> > > be particularly hard to do and will take a look in next few days. +> +> > > +> +> > > Very very indirectly this error is causing a driver probe fail that +> +> > > means that +> +> > > we hit a code path that has a rather odd looking check on NDD_LABELING. +> +> > > Should not have gotten near that path though - hence the problem is +> +> > > actually +> +> > > when we call cxl_pmem_get_config_data() and it returns an error because +> +> > > we haven't fully connected up the command in QEMU. +> +> > +> +> > So a least one bug in QEMU. We were not supporting variable length +> +> > payloads on mailbox +> +> > inputs (but were on outputs). That hasn't mattered until we get to LSA +> +> > writes. +> +> > We just need to relax condition on the supplied length. +> +> > +> +> > diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +> +> > index c352a935c4..fdda9529fe 100644 +> +> > --- a/hw/cxl/cxl-mailbox-utils.c +> +> > +++ b/hw/cxl/cxl-mailbox-utils.c +> +> > @@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) +> +> > cxl_cmd = &cxl_cmd_set[set][cmd]; +> +> > h = cxl_cmd->handler; +> +> > if (h) { +> +> > - if (len == cxl_cmd->in) { +> +> > + if (len == cxl_cmd->in || !cxl_cmd->in) { +> +> > cxl_cmd->payload = cxl_dstate->mbox_reg_state + +> +> > A_CXL_DEV_CMD_PAYLOAD; +> +> > ret = (*h)(cxl_cmd, cxl_dstate, &len); +> +> > +> +> > +> +> > This lets the nvdimm/region probe fine, but I'm getting some issues with +> +> > namespace capacity so I'll look at what is causing that next. +> +> > Unfortunately I'm not that familiar with the driver/nvdimm side of things +> +> > so it's take a while to figure out what kicks off what! +> +> +> +> The whirlwind tour is that 'struct nd_region' instances that represent a +> +> persitent memory address range are composed of one more mappings of +> +> 'struct nvdimm' objects. The nvdimm object is driven by the dimm driver +> +> in drivers/nvdimm/dimm.c. That driver is mainly charged with unlocking +> +> the dimm (if locked) and interrogating the label area to look for +> +> namespace labels. +> +> +> +> The label command calls are routed to the '->ndctl()' callback that was +> +> registered when the CXL nvdimm_bus_descriptor was created. That callback +> +> handles both 'bus' scope calls, currently none for CXL, and per nvdimm +> +> calls. cxl_pmem_nvdimm_ctl() translates those generic LIBNVDIMM commands +> +> to CXL commands. +> +> +> +> The 'struct nvdimm' objects that the CXL side registers have the +> +> NDD_LABELING flag set which means that namespaces need to be explicitly +> +> created / provisioned from region capacity. Otherwise, if +> +> drivers/nvdimm/dimm.c does not find a namespace-label-index block then +> +> the region reverts to label-less mode and a default namespace equal to +> +> the size of the region is instantiated. +> +> +> +> If you are seeing small mismatches in namespace capacity then it may +> +> just be the fact that by default 'ndctl create-namespace' results in an +> +> 'fsdax' mode namespace which just means that it is a block device where +> +> 1.5% of the capacity is reserved for 'struct page' metadata. You should +> +> be able to see namespace capacity == region capacity by doing "ndctl +> +> create-namespace -m raw", and disable DAX operation. +> +> +Currently ndctl create-namespace crashes qemu ;) +> +Which isn't ideal! +> +Found a cause for this one. Mailbox payload may be as small as 256 bytes. +We have code in kernel sanity checking that output payload fits in the +mailbox, but nothing on the input payload. Symptom is that we write just +off the end whatever size the payload is. Note doing this shouldn't crash +qemu - so I need to fix a range check somewhere. + +I think this is because cxl_pmem_get_config_size() returns the mailbox +payload size as being the available LSA size, forgetting to remove the +size of the headers on the set_lsa side of things. +https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git/tree/drivers/cxl/pmem.c?h=next#n110 +I've hacked the max_payload to be -8 + +Now we still don't succeed in creating the namespace, but bonus is it doesn't +crash any more. + + +Jonathan + + + +> +> +> +> Hope that helps. +> +Got me looking at the right code. Thanks! +> +> +Jonathan +> +> + +On Mon, 15 Aug 2022 15:18:09 +0100 +Jonathan Cameron via wrote: + +> +On Fri, 12 Aug 2022 17:15:09 +0100 +> +Jonathan Cameron wrote: +> +> +> On Fri, 12 Aug 2022 09:03:02 -0700 +> +> Dan Williams wrote: +> +> +> +> > Jonathan Cameron wrote: +> +> > > On Thu, 11 Aug 2022 18:08:57 +0100 +> +> > > Jonathan Cameron via wrote: +> +> > > +> +> > > > On Tue, 9 Aug 2022 17:08:25 +0100 +> +> > > > Jonathan Cameron wrote: +> +> > > > +> +> > > > > On Tue, 9 Aug 2022 21:07:06 +0800 +> +> > > > > Bobo WL wrote: +> +> > > > > +> +> > > > > > Hi Jonathan +> +> > > > > > +> +> > > > > > Thanks for your reply! +> +> > > > > > +> +> > > > > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +> > > > > > wrote: +> +> > > > > > > +> +> > > > > > > Probably not related to your problem, but there is a disconnect +> +> > > > > > > in QEMU / +> +> > > > > > > kernel assumptionsaround the presence of an HDM decoder when a +> +> > > > > > > HB only +> +> > > > > > > has a single root port. Spec allows it to be provided or not as +> +> > > > > > > an implementation choice. +> +> > > > > > > Kernel assumes it isn't provide. Qemu assumes it is. +> +> > > > > > > +> +> > > > > > > The temporary solution is to throw in a second root port on the +> +> > > > > > > HB and not +> +> > > > > > > connect anything to it. Longer term I may special case this so +> +> > > > > > > that the particular +> +> > > > > > > decoder defaults to pass through settings in QEMU if there is +> +> > > > > > > only one root port. +> +> > > > > > > +> +> > > > > > +> +> > > > > > You are right! After adding an extra HB in qemu, I can create a x1 +> +> > > > > > region successfully. +> +> > > > > > But have some errors in Nvdimm: +> +> > > > > > +> +> > > > > > [ 74.925838] Unknown online node for memory at 0x10000000000, +> +> > > > > > assuming node 0 +> +> > > > > > [ 74.925846] Unknown target node for memory at 0x10000000000, +> +> > > > > > assuming node 0 +> +> > > > > > [ 74.927470] nd_region region0: nmem0: is disabled, failing +> +> > > > > > probe +> +> > > > > +> +> > > > > Ah. I've seen this one, but not chased it down yet. Was on my todo +> +> > > > > list to chase +> +> > > > > down. Once I reach this state I can verify the HDM Decode is +> +> > > > > correct which is what +> +> > > > > I've been using to test (Which wasn't true until earlier this +> +> > > > > week). +> +> > > > > I'm currently testing via devmem, more for historical reasons than +> +> > > > > because it makes +> +> > > > > that much sense anymore. +> +> > > > +> +> > > > *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. +> +> > > > I'd forgotten that was still on the todo list. I don't think it will +> +> > > > be particularly hard to do and will take a look in next few days. +> +> > > > +> +> > > > Very very indirectly this error is causing a driver probe fail that +> +> > > > means that +> +> > > > we hit a code path that has a rather odd looking check on +> +> > > > NDD_LABELING. +> +> > > > Should not have gotten near that path though - hence the problem is +> +> > > > actually +> +> > > > when we call cxl_pmem_get_config_data() and it returns an error +> +> > > > because +> +> > > > we haven't fully connected up the command in QEMU. +> +> > > +> +> > > So a least one bug in QEMU. We were not supporting variable length +> +> > > payloads on mailbox +> +> > > inputs (but were on outputs). That hasn't mattered until we get to LSA +> +> > > writes. +> +> > > We just need to relax condition on the supplied length. +> +> > > +> +> > > diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +> +> > > index c352a935c4..fdda9529fe 100644 +> +> > > --- a/hw/cxl/cxl-mailbox-utils.c +> +> > > +++ b/hw/cxl/cxl-mailbox-utils.c +> +> > > @@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) +> +> > > cxl_cmd = &cxl_cmd_set[set][cmd]; +> +> > > h = cxl_cmd->handler; +> +> > > if (h) { +> +> > > - if (len == cxl_cmd->in) { +> +> > > + if (len == cxl_cmd->in || !cxl_cmd->in) { +> +> > > cxl_cmd->payload = cxl_dstate->mbox_reg_state + +> +> > > A_CXL_DEV_CMD_PAYLOAD; +> +> > > ret = (*h)(cxl_cmd, cxl_dstate, &len); +> +> > > +> +> > > +> +> > > This lets the nvdimm/region probe fine, but I'm getting some issues with +> +> > > namespace capacity so I'll look at what is causing that next. +> +> > > Unfortunately I'm not that familiar with the driver/nvdimm side of +> +> > > things +> +> > > so it's take a while to figure out what kicks off what! +> +> > +> +> > The whirlwind tour is that 'struct nd_region' instances that represent a +> +> > persitent memory address range are composed of one more mappings of +> +> > 'struct nvdimm' objects. The nvdimm object is driven by the dimm driver +> +> > in drivers/nvdimm/dimm.c. That driver is mainly charged with unlocking +> +> > the dimm (if locked) and interrogating the label area to look for +> +> > namespace labels. +> +> > +> +> > The label command calls are routed to the '->ndctl()' callback that was +> +> > registered when the CXL nvdimm_bus_descriptor was created. That callback +> +> > handles both 'bus' scope calls, currently none for CXL, and per nvdimm +> +> > calls. cxl_pmem_nvdimm_ctl() translates those generic LIBNVDIMM commands +> +> > to CXL commands. +> +> > +> +> > The 'struct nvdimm' objects that the CXL side registers have the +> +> > NDD_LABELING flag set which means that namespaces need to be explicitly +> +> > created / provisioned from region capacity. Otherwise, if +> +> > drivers/nvdimm/dimm.c does not find a namespace-label-index block then +> +> > the region reverts to label-less mode and a default namespace equal to +> +> > the size of the region is instantiated. +> +> > +> +> > If you are seeing small mismatches in namespace capacity then it may +> +> > just be the fact that by default 'ndctl create-namespace' results in an +> +> > 'fsdax' mode namespace which just means that it is a block device where +> +> > 1.5% of the capacity is reserved for 'struct page' metadata. You should +> +> > be able to see namespace capacity == region capacity by doing "ndctl +> +> > create-namespace -m raw", and disable DAX operation. +> +> +> +> Currently ndctl create-namespace crashes qemu ;) +> +> Which isn't ideal! +> +> +> +> +Found a cause for this one. Mailbox payload may be as small as 256 bytes. +> +We have code in kernel sanity checking that output payload fits in the +> +mailbox, but nothing on the input payload. Symptom is that we write just +> +off the end whatever size the payload is. Note doing this shouldn't crash +> +qemu - so I need to fix a range check somewhere. +> +> +I think this is because cxl_pmem_get_config_size() returns the mailbox +> +payload size as being the available LSA size, forgetting to remove the +> +size of the headers on the set_lsa side of things. +> +https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git/tree/drivers/cxl/pmem.c?h=next#n110 +> +> +I've hacked the max_payload to be -8 +> +> +Now we still don't succeed in creating the namespace, but bonus is it doesn't +> +crash any more. +In the interests of defensive / correct handling from QEMU I took a +look into why it was crashing. Turns out that providing a NULL write callback +for +the memory device region (that the above overlarge write was spilling into) +isn't +a safe thing to do. Needs a stub. Oops. + +On plus side we might never have noticed this was going wrong without the crash +*silver lining in every cloud* + +Fix to follow... + +Jonathan + + +> +> +> +Jonathan +> +> +> +> +> > +> +> > Hope that helps. +> +> Got me looking at the right code. Thanks! +> +> +> +> Jonathan +> +> +> +> +> +> + +On Mon, 15 Aug 2022 at 15:55, Jonathan Cameron via wrote: +> +In the interests of defensive / correct handling from QEMU I took a +> +look into why it was crashing. Turns out that providing a NULL write +> +callback for +> +the memory device region (that the above overlarge write was spilling into) +> +isn't +> +a safe thing to do. Needs a stub. Oops. +Yeah. We've talked before about adding an assert so that that kind of +"missing function" bug is caught at device creation rather than only +if the guest tries to access the device, but we never quite got around +to it... + +-- PMM + +On Fri, 12 Aug 2022 16:44:03 +0100 +Jonathan Cameron wrote: + +> +On Thu, 11 Aug 2022 18:08:57 +0100 +> +Jonathan Cameron via wrote: +> +> +> On Tue, 9 Aug 2022 17:08:25 +0100 +> +> Jonathan Cameron wrote: +> +> +> +> > On Tue, 9 Aug 2022 21:07:06 +0800 +> +> > Bobo WL wrote: +> +> > +> +> > > Hi Jonathan +> +> > > +> +> > > Thanks for your reply! +> +> > > +> +> > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +> > > wrote: +> +> > > > +> +> > > > Probably not related to your problem, but there is a disconnect in +> +> > > > QEMU / +> +> > > > kernel assumptionsaround the presence of an HDM decoder when a HB only +> +> > > > has a single root port. Spec allows it to be provided or not as an +> +> > > > implementation choice. +> +> > > > Kernel assumes it isn't provide. Qemu assumes it is. +> +> > > > +> +> > > > The temporary solution is to throw in a second root port on the HB +> +> > > > and not +> +> > > > connect anything to it. Longer term I may special case this so that +> +> > > > the particular +> +> > > > decoder defaults to pass through settings in QEMU if there is only +> +> > > > one root port. +> +> > > > +> +> > > +> +> > > You are right! After adding an extra HB in qemu, I can create a x1 +> +> > > region successfully. +> +> > > But have some errors in Nvdimm: +> +> > > +> +> > > [ 74.925838] Unknown online node for memory at 0x10000000000, +> +> > > assuming node 0 +> +> > > [ 74.925846] Unknown target node for memory at 0x10000000000, +> +> > > assuming node 0 +> +> > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe +> +> > > +> +> > +> +> > Ah. I've seen this one, but not chased it down yet. Was on my todo list +> +> > to chase +> +> > down. Once I reach this state I can verify the HDM Decode is correct +> +> > which is what +> +> > I've been using to test (Which wasn't true until earlier this week). +> +> > I'm currently testing via devmem, more for historical reasons than +> +> > because it makes +> +> > that much sense anymore. +> +> +> +> *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. +> +> I'd forgotten that was still on the todo list. I don't think it will +> +> be particularly hard to do and will take a look in next few days. +> +> +> +> Very very indirectly this error is causing a driver probe fail that means +> +> that +> +> we hit a code path that has a rather odd looking check on NDD_LABELING. +> +> Should not have gotten near that path though - hence the problem is actually +> +> when we call cxl_pmem_get_config_data() and it returns an error because +> +> we haven't fully connected up the command in QEMU. +> +> +So a least one bug in QEMU. We were not supporting variable length payloads +> +on mailbox +> +inputs (but were on outputs). That hasn't mattered until we get to LSA +> +writes. +> +We just need to relax condition on the supplied length. +> +> +diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +> +index c352a935c4..fdda9529fe 100644 +> +--- a/hw/cxl/cxl-mailbox-utils.c +> ++++ b/hw/cxl/cxl-mailbox-utils.c +> +@@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) +> +cxl_cmd = &cxl_cmd_set[set][cmd]; +> +h = cxl_cmd->handler; +> +if (h) { +> +- if (len == cxl_cmd->in) { +> ++ if (len == cxl_cmd->in || !cxl_cmd->in) { +Fix is wrong as we use ~0 as the placeholder for variable payload, not 0. + +With that fixed we hit new fun paths - after some errors we get the +worrying - not totally sure but looks like a failure on an error cleanup. +I'll chase down the error source, but even then this is probably triggerable by +hardware problem or similar. Some bonus prints in here from me chasing +error paths, but it's otherwise just cxl/next + the fix I posted earlier today. + +[ 69.919877] nd_bus ndbus0: START: nd_region.probe(region0) +[ 69.920108] nd_region_probe +[ 69.920623] ------------[ cut here ]------------ +[ 69.920675] refcount_t: addition on 0; use-after-free. +[ 69.921314] WARNING: CPU: 3 PID: 710 at lib/refcount.c:25 +refcount_warn_saturate+0xa0/0x144 +[ 69.926949] Modules linked in: cxl_pmem cxl_mem cxl_pci cxl_port cxl_acpi +cxl_core +[ 69.928830] CPU: 3 PID: 710 Comm: kworker/u8:9 Not tainted 5.19.0-rc3+ #399 +[ 69.930596] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 +[ 69.931482] Workqueue: events_unbound async_run_entry_fn +[ 69.932403] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) +[ 69.934023] pc : refcount_warn_saturate+0xa0/0x144 +[ 69.935161] lr : refcount_warn_saturate+0xa0/0x144 +[ 69.936541] sp : ffff80000890b960 +[ 69.937921] x29: ffff80000890b960 x28: 0000000000000000 x27: 0000000000000000 +[ 69.940917] x26: ffffa54a90d5cb10 x25: ffffa54a90809e98 x24: 0000000000000000 +[ 69.942537] x23: ffffa54a91a3d8d8 x22: ffff0000c5254800 x21: ffff0000c5254800 +[ 69.944013] x20: ffff0000ce924180 x19: ffff0000c5254800 x18: ffffffffffffffff +[ 69.946100] x17: ffff5ab66e5ef000 x16: ffff80000801c000 x15: 0000000000000000 +[ 69.947585] x14: 0000000000000001 x13: 0a2e656572662d72 x12: 657466612d657375 +[ 69.948670] x11: 203b30206e6f206e x10: 6f69746964646120 x9 : ffffa54a8f63d288 +[ 69.950679] x8 : 206e6f206e6f6974 x7 : 69646461203a745f x6 : 00000000fffff31e +[ 69.952113] x5 : ffff0000ff61ba08 x4 : 00000000fffff31e x3 : ffff5ab66e5ef000 +root@debian:/sys/bus/cxl/devices/decoder0.0/region0# [ 69.954752] x2 : +0000000000000000 x1 : 0000000000000000 x0 : ffff0000c512e740 +[ 69.957098] Call trace: +[ 69.957959] refcount_warn_saturate+0xa0/0x144 +[ 69.958773] get_ndd+0x5c/0x80 +[ 69.959294] nd_region_register_namespaces+0xe4/0xe90 +[ 69.960253] nd_region_probe+0x100/0x290 +[ 69.960796] nvdimm_bus_probe+0xf4/0x1c0 +[ 69.962087] really_probe+0x19c/0x3f0 +[ 69.962620] __driver_probe_device+0x11c/0x190 +[ 69.963258] driver_probe_device+0x44/0xf4 +[ 69.963773] __device_attach_driver+0xa4/0x140 +[ 69.964471] bus_for_each_drv+0x84/0xe0 +[ 69.965068] __device_attach+0xb0/0x1f0 +[ 69.966101] device_initial_probe+0x20/0x30 +[ 69.967142] bus_probe_device+0xa4/0xb0 +[ 69.968104] device_add+0x3e8/0x910 +[ 69.969111] nd_async_device_register+0x24/0x74 +[ 69.969928] async_run_entry_fn+0x40/0x150 +[ 69.970725] process_one_work+0x1dc/0x450 +[ 69.971796] worker_thread+0x154/0x450 +[ 69.972700] kthread+0x118/0x120 +[ 69.974141] ret_from_fork+0x10/0x20 +[ 69.975141] ---[ end trace 0000000000000000 ]--- +[ 70.117887] Into nd_namespace_pmem_set_resource() + +> +cxl_cmd->payload = cxl_dstate->mbox_reg_state + +> +A_CXL_DEV_CMD_PAYLOAD; +> +ret = (*h)(cxl_cmd, cxl_dstate, &len); +> +> +> +This lets the nvdimm/region probe fine, but I'm getting some issues with +> +namespace capacity so I'll look at what is causing that next. +> +Unfortunately I'm not that familiar with the driver/nvdimm side of things +> +so it's take a while to figure out what kicks off what! +> +> +Jonathan +> +> +> +> +> Jonathan +> +> +> +> +> +> > +> +> > > +> +> > > And x4 region still failed with same errors, using latest cxl/preview +> +> > > branch don't work. +> +> > > I have picked "Two CXL emulation fixes" patches in qemu, still not +> +> > > working. +> +> > > +> +> > > Bob +> +> +> +> +> + +On Mon, 15 Aug 2022 18:04:44 +0100 +Jonathan Cameron wrote: + +> +On Fri, 12 Aug 2022 16:44:03 +0100 +> +Jonathan Cameron wrote: +> +> +> On Thu, 11 Aug 2022 18:08:57 +0100 +> +> Jonathan Cameron via wrote: +> +> +> +> > On Tue, 9 Aug 2022 17:08:25 +0100 +> +> > Jonathan Cameron wrote: +> +> > +> +> > > On Tue, 9 Aug 2022 21:07:06 +0800 +> +> > > Bobo WL wrote: +> +> > > +> +> > > > Hi Jonathan +> +> > > > +> +> > > > Thanks for your reply! +> +> > > > +> +> > > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +> > > > wrote: +> +> > > > > +> +> > > > > Probably not related to your problem, but there is a disconnect in +> +> > > > > QEMU / +> +> > > > > kernel assumptionsaround the presence of an HDM decoder when a HB +> +> > > > > only +> +> > > > > has a single root port. Spec allows it to be provided or not as an +> +> > > > > implementation choice. +> +> > > > > Kernel assumes it isn't provide. Qemu assumes it is. +> +> > > > > +> +> > > > > The temporary solution is to throw in a second root port on the HB +> +> > > > > and not +> +> > > > > connect anything to it. Longer term I may special case this so +> +> > > > > that the particular +> +> > > > > decoder defaults to pass through settings in QEMU if there is only +> +> > > > > one root port. +> +> > > > > +> +> > > > +> +> > > > You are right! After adding an extra HB in qemu, I can create a x1 +> +> > > > region successfully. +> +> > > > But have some errors in Nvdimm: +> +> > > > +> +> > > > [ 74.925838] Unknown online node for memory at 0x10000000000, +> +> > > > assuming node 0 +> +> > > > [ 74.925846] Unknown target node for memory at 0x10000000000, +> +> > > > assuming node 0 +> +> > > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe +> +> > > > +> +> > > +> +> > > Ah. I've seen this one, but not chased it down yet. Was on my todo +> +> > > list to chase +> +> > > down. Once I reach this state I can verify the HDM Decode is correct +> +> > > which is what +> +> > > I've been using to test (Which wasn't true until earlier this week). +> +> > > I'm currently testing via devmem, more for historical reasons than +> +> > > because it makes +> +> > > that much sense anymore. +> +> > +> +> > *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. +> +> > I'd forgotten that was still on the todo list. I don't think it will +> +> > be particularly hard to do and will take a look in next few days. +> +> > +> +> > Very very indirectly this error is causing a driver probe fail that means +> +> > that +> +> > we hit a code path that has a rather odd looking check on NDD_LABELING. +> +> > Should not have gotten near that path though - hence the problem is +> +> > actually +> +> > when we call cxl_pmem_get_config_data() and it returns an error because +> +> > we haven't fully connected up the command in QEMU. +> +> +> +> So a least one bug in QEMU. We were not supporting variable length payloads +> +> on mailbox +> +> inputs (but were on outputs). That hasn't mattered until we get to LSA +> +> writes. +> +> We just need to relax condition on the supplied length. +> +> +> +> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +> +> index c352a935c4..fdda9529fe 100644 +> +> --- a/hw/cxl/cxl-mailbox-utils.c +> +> +++ b/hw/cxl/cxl-mailbox-utils.c +> +> @@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) +> +> cxl_cmd = &cxl_cmd_set[set][cmd]; +> +> h = cxl_cmd->handler; +> +> if (h) { +> +> - if (len == cxl_cmd->in) { +> +> + if (len == cxl_cmd->in || !cxl_cmd->in) { +> +Fix is wrong as we use ~0 as the placeholder for variable payload, not 0. +Cause of the error is a failure in GET_LSA. +Reason, payload length is wrong in QEMU but was hidden previously by my wrong +fix here. Probably still a good idea to inject an error in GET_LSA and chase +down the refcount issue. + + +diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +index fdda9529fe..e8565fbd6e 100644 +--- a/hw/cxl/cxl-mailbox-utils.c ++++ b/hw/cxl/cxl-mailbox-utils.c +@@ -489,7 +489,7 @@ static struct cxl_cmd cxl_cmd_set[256][256] = { + cmd_identify_memory_device, 0, 0 }, + [CCLS][GET_PARTITION_INFO] = { "CCLS_GET_PARTITION_INFO", + cmd_ccls_get_partition_info, 0, 0 }, +- [CCLS][GET_LSA] = { "CCLS_GET_LSA", cmd_ccls_get_lsa, 0, 0 }, ++ [CCLS][GET_LSA] = { "CCLS_GET_LSA", cmd_ccls_get_lsa, 8, 0 }, + [CCLS][SET_LSA] = { "CCLS_SET_LSA", cmd_ccls_set_lsa, + ~0, IMMEDIATE_CONFIG_CHANGE | IMMEDIATE_DATA_CHANGE }, + [MEDIA_AND_POISON][GET_POISON_LIST] = { "MEDIA_AND_POISON_GET_POISON_LIST", +@@ -510,12 +510,13 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) + cxl_cmd = &cxl_cmd_set[set][cmd]; + h = cxl_cmd->handler; + if (h) { +- if (len == cxl_cmd->in || !cxl_cmd->in) { ++ if (len == cxl_cmd->in || cxl_cmd->in == ~0) { + cxl_cmd->payload = cxl_dstate->mbox_reg_state + + A_CXL_DEV_CMD_PAYLOAD; + +And woot, we get a namespace in the LSA :) + +I'll post QEMU fixes in next day or two. Kernel side now seems more or less +fine be it with suspicious refcount underflow. + +> +> +With that fixed we hit new fun paths - after some errors we get the +> +worrying - not totally sure but looks like a failure on an error cleanup. +> +I'll chase down the error source, but even then this is probably triggerable +> +by +> +hardware problem or similar. Some bonus prints in here from me chasing +> +error paths, but it's otherwise just cxl/next + the fix I posted earlier +> +today. +> +> +[ 69.919877] nd_bus ndbus0: START: nd_region.probe(region0) +> +[ 69.920108] nd_region_probe +> +[ 69.920623] ------------[ cut here ]------------ +> +[ 69.920675] refcount_t: addition on 0; use-after-free. +> +[ 69.921314] WARNING: CPU: 3 PID: 710 at lib/refcount.c:25 +> +refcount_warn_saturate+0xa0/0x144 +> +[ 69.926949] Modules linked in: cxl_pmem cxl_mem cxl_pci cxl_port cxl_acpi +> +cxl_core +> +[ 69.928830] CPU: 3 PID: 710 Comm: kworker/u8:9 Not tainted 5.19.0-rc3+ #399 +> +[ 69.930596] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 +> +[ 69.931482] Workqueue: events_unbound async_run_entry_fn +> +[ 69.932403] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) +> +[ 69.934023] pc : refcount_warn_saturate+0xa0/0x144 +> +[ 69.935161] lr : refcount_warn_saturate+0xa0/0x144 +> +[ 69.936541] sp : ffff80000890b960 +> +[ 69.937921] x29: ffff80000890b960 x28: 0000000000000000 x27: +> +0000000000000000 +> +[ 69.940917] x26: ffffa54a90d5cb10 x25: ffffa54a90809e98 x24: +> +0000000000000000 +> +[ 69.942537] x23: ffffa54a91a3d8d8 x22: ffff0000c5254800 x21: +> +ffff0000c5254800 +> +[ 69.944013] x20: ffff0000ce924180 x19: ffff0000c5254800 x18: +> +ffffffffffffffff +> +[ 69.946100] x17: ffff5ab66e5ef000 x16: ffff80000801c000 x15: +> +0000000000000000 +> +[ 69.947585] x14: 0000000000000001 x13: 0a2e656572662d72 x12: +> +657466612d657375 +> +[ 69.948670] x11: 203b30206e6f206e x10: 6f69746964646120 x9 : +> +ffffa54a8f63d288 +> +[ 69.950679] x8 : 206e6f206e6f6974 x7 : 69646461203a745f x6 : +> +00000000fffff31e +> +[ 69.952113] x5 : ffff0000ff61ba08 x4 : 00000000fffff31e x3 : +> +ffff5ab66e5ef000 +> +root@debian:/sys/bus/cxl/devices/decoder0.0/region0# [ 69.954752] x2 : +> +0000000000000000 x1 : 0000000000000000 x0 : ffff0000c512e740 +> +[ 69.957098] Call trace: +> +[ 69.957959] refcount_warn_saturate+0xa0/0x144 +> +[ 69.958773] get_ndd+0x5c/0x80 +> +[ 69.959294] nd_region_register_namespaces+0xe4/0xe90 +> +[ 69.960253] nd_region_probe+0x100/0x290 +> +[ 69.960796] nvdimm_bus_probe+0xf4/0x1c0 +> +[ 69.962087] really_probe+0x19c/0x3f0 +> +[ 69.962620] __driver_probe_device+0x11c/0x190 +> +[ 69.963258] driver_probe_device+0x44/0xf4 +> +[ 69.963773] __device_attach_driver+0xa4/0x140 +> +[ 69.964471] bus_for_each_drv+0x84/0xe0 +> +[ 69.965068] __device_attach+0xb0/0x1f0 +> +[ 69.966101] device_initial_probe+0x20/0x30 +> +[ 69.967142] bus_probe_device+0xa4/0xb0 +> +[ 69.968104] device_add+0x3e8/0x910 +> +[ 69.969111] nd_async_device_register+0x24/0x74 +> +[ 69.969928] async_run_entry_fn+0x40/0x150 +> +[ 69.970725] process_one_work+0x1dc/0x450 +> +[ 69.971796] worker_thread+0x154/0x450 +> +[ 69.972700] kthread+0x118/0x120 +> +[ 69.974141] ret_from_fork+0x10/0x20 +> +[ 69.975141] ---[ end trace 0000000000000000 ]--- +> +[ 70.117887] Into nd_namespace_pmem_set_resource() +> +> +> cxl_cmd->payload = cxl_dstate->mbox_reg_state + +> +> A_CXL_DEV_CMD_PAYLOAD; +> +> ret = (*h)(cxl_cmd, cxl_dstate, &len); +> +> +> +> +> +> This lets the nvdimm/region probe fine, but I'm getting some issues with +> +> namespace capacity so I'll look at what is causing that next. +> +> Unfortunately I'm not that familiar with the driver/nvdimm side of things +> +> so it's take a while to figure out what kicks off what! +> +> +> +> Jonathan +> +> +> +> > +> +> > Jonathan +> +> > +> +> > +> +> > > +> +> > > > +> +> > > > And x4 region still failed with same errors, using latest cxl/preview +> +> > > > branch don't work. +> +> > > > I have picked "Two CXL emulation fixes" patches in qemu, still not +> +> > > > working. +> +> > > > +> +> > > > Bob +> +> > +> +> > +> +> +> + +Jonathan Cameron wrote: +> +On Fri, 12 Aug 2022 16:44:03 +0100 +> +Jonathan Cameron wrote: +> +> +> On Thu, 11 Aug 2022 18:08:57 +0100 +> +> Jonathan Cameron via wrote: +> +> +> +> > On Tue, 9 Aug 2022 17:08:25 +0100 +> +> > Jonathan Cameron wrote: +> +> > +> +> > > On Tue, 9 Aug 2022 21:07:06 +0800 +> +> > > Bobo WL wrote: +> +> > > +> +> > > > Hi Jonathan +> +> > > > +> +> > > > Thanks for your reply! +> +> > > > +> +> > > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +> > > > wrote: +> +> > > > > +> +> > > > > Probably not related to your problem, but there is a disconnect in +> +> > > > > QEMU / +> +> > > > > kernel assumptionsaround the presence of an HDM decoder when a HB +> +> > > > > only +> +> > > > > has a single root port. Spec allows it to be provided or not as an +> +> > > > > implementation choice. +> +> > > > > Kernel assumes it isn't provide. Qemu assumes it is. +> +> > > > > +> +> > > > > The temporary solution is to throw in a second root port on the HB +> +> > > > > and not +> +> > > > > connect anything to it. Longer term I may special case this so +> +> > > > > that the particular +> +> > > > > decoder defaults to pass through settings in QEMU if there is only +> +> > > > > one root port. +> +> > > > > +> +> > > > +> +> > > > You are right! After adding an extra HB in qemu, I can create a x1 +> +> > > > region successfully. +> +> > > > But have some errors in Nvdimm: +> +> > > > +> +> > > > [ 74.925838] Unknown online node for memory at 0x10000000000, +> +> > > > assuming node 0 +> +> > > > [ 74.925846] Unknown target node for memory at 0x10000000000, +> +> > > > assuming node 0 +> +> > > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe +> +> > > > +> +> > > +> +> > > Ah. I've seen this one, but not chased it down yet. Was on my todo +> +> > > list to chase +> +> > > down. Once I reach this state I can verify the HDM Decode is correct +> +> > > which is what +> +> > > I've been using to test (Which wasn't true until earlier this week). +> +> > > I'm currently testing via devmem, more for historical reasons than +> +> > > because it makes +> +> > > that much sense anymore. +> +> > +> +> > *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. +> +> > I'd forgotten that was still on the todo list. I don't think it will +> +> > be particularly hard to do and will take a look in next few days. +> +> > +> +> > Very very indirectly this error is causing a driver probe fail that means +> +> > that +> +> > we hit a code path that has a rather odd looking check on NDD_LABELING. +> +> > Should not have gotten near that path though - hence the problem is +> +> > actually +> +> > when we call cxl_pmem_get_config_data() and it returns an error because +> +> > we haven't fully connected up the command in QEMU. +> +> +> +> So a least one bug in QEMU. We were not supporting variable length payloads +> +> on mailbox +> +> inputs (but were on outputs). That hasn't mattered until we get to LSA +> +> writes. +> +> We just need to relax condition on the supplied length. +> +> +> +> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +> +> index c352a935c4..fdda9529fe 100644 +> +> --- a/hw/cxl/cxl-mailbox-utils.c +> +> +++ b/hw/cxl/cxl-mailbox-utils.c +> +> @@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) +> +> cxl_cmd = &cxl_cmd_set[set][cmd]; +> +> h = cxl_cmd->handler; +> +> if (h) { +> +> - if (len == cxl_cmd->in) { +> +> + if (len == cxl_cmd->in || !cxl_cmd->in) { +> +Fix is wrong as we use ~0 as the placeholder for variable payload, not 0. +> +> +With that fixed we hit new fun paths - after some errors we get the +> +worrying - not totally sure but looks like a failure on an error cleanup. +> +I'll chase down the error source, but even then this is probably triggerable +> +by +> +hardware problem or similar. Some bonus prints in here from me chasing +> +error paths, but it's otherwise just cxl/next + the fix I posted earlier +> +today. +One of the scenarios that I cannot rule out is nvdimm_probe() racing +nd_region_probe(), but given all the work it takes to create a region I +suspect all the nvdimm_probe() work to have completed... + +It is at least one potentially wrong hypothesis that needs to be chased +down. + +> +> +[ 69.919877] nd_bus ndbus0: START: nd_region.probe(region0) +> +[ 69.920108] nd_region_probe +> +[ 69.920623] ------------[ cut here ]------------ +> +[ 69.920675] refcount_t: addition on 0; use-after-free. +> +[ 69.921314] WARNING: CPU: 3 PID: 710 at lib/refcount.c:25 +> +refcount_warn_saturate+0xa0/0x144 +> +[ 69.926949] Modules linked in: cxl_pmem cxl_mem cxl_pci cxl_port cxl_acpi +> +cxl_core +> +[ 69.928830] CPU: 3 PID: 710 Comm: kworker/u8:9 Not tainted 5.19.0-rc3+ #399 +> +[ 69.930596] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 +> +[ 69.931482] Workqueue: events_unbound async_run_entry_fn +> +[ 69.932403] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) +> +[ 69.934023] pc : refcount_warn_saturate+0xa0/0x144 +> +[ 69.935161] lr : refcount_warn_saturate+0xa0/0x144 +> +[ 69.936541] sp : ffff80000890b960 +> +[ 69.937921] x29: ffff80000890b960 x28: 0000000000000000 x27: +> +0000000000000000 +> +[ 69.940917] x26: ffffa54a90d5cb10 x25: ffffa54a90809e98 x24: +> +0000000000000000 +> +[ 69.942537] x23: ffffa54a91a3d8d8 x22: ffff0000c5254800 x21: +> +ffff0000c5254800 +> +[ 69.944013] x20: ffff0000ce924180 x19: ffff0000c5254800 x18: +> +ffffffffffffffff +> +[ 69.946100] x17: ffff5ab66e5ef000 x16: ffff80000801c000 x15: +> +0000000000000000 +> +[ 69.947585] x14: 0000000000000001 x13: 0a2e656572662d72 x12: +> +657466612d657375 +> +[ 69.948670] x11: 203b30206e6f206e x10: 6f69746964646120 x9 : +> +ffffa54a8f63d288 +> +[ 69.950679] x8 : 206e6f206e6f6974 x7 : 69646461203a745f x6 : +> +00000000fffff31e +> +[ 69.952113] x5 : ffff0000ff61ba08 x4 : 00000000fffff31e x3 : +> +ffff5ab66e5ef000 +> +root@debian:/sys/bus/cxl/devices/decoder0.0/region0# [ 69.954752] x2 : +> +0000000000000000 x1 : 0000000000000000 x0 : ffff0000c512e740 +> +[ 69.957098] Call trace: +> +[ 69.957959] refcount_warn_saturate+0xa0/0x144 +> +[ 69.958773] get_ndd+0x5c/0x80 +> +[ 69.959294] nd_region_register_namespaces+0xe4/0xe90 +> +[ 69.960253] nd_region_probe+0x100/0x290 +> +[ 69.960796] nvdimm_bus_probe+0xf4/0x1c0 +> +[ 69.962087] really_probe+0x19c/0x3f0 +> +[ 69.962620] __driver_probe_device+0x11c/0x190 +> +[ 69.963258] driver_probe_device+0x44/0xf4 +> +[ 69.963773] __device_attach_driver+0xa4/0x140 +> +[ 69.964471] bus_for_each_drv+0x84/0xe0 +> +[ 69.965068] __device_attach+0xb0/0x1f0 +> +[ 69.966101] device_initial_probe+0x20/0x30 +> +[ 69.967142] bus_probe_device+0xa4/0xb0 +> +[ 69.968104] device_add+0x3e8/0x910 +> +[ 69.969111] nd_async_device_register+0x24/0x74 +> +[ 69.969928] async_run_entry_fn+0x40/0x150 +> +[ 69.970725] process_one_work+0x1dc/0x450 +> +[ 69.971796] worker_thread+0x154/0x450 +> +[ 69.972700] kthread+0x118/0x120 +> +[ 69.974141] ret_from_fork+0x10/0x20 +> +[ 69.975141] ---[ end trace 0000000000000000 ]--- +> +[ 70.117887] Into nd_namespace_pmem_set_resource() + +On Mon, 15 Aug 2022 15:55:15 -0700 +Dan Williams wrote: + +> +Jonathan Cameron wrote: +> +> On Fri, 12 Aug 2022 16:44:03 +0100 +> +> Jonathan Cameron wrote: +> +> +> +> > On Thu, 11 Aug 2022 18:08:57 +0100 +> +> > Jonathan Cameron via wrote: +> +> > +> +> > > On Tue, 9 Aug 2022 17:08:25 +0100 +> +> > > Jonathan Cameron wrote: +> +> > > +> +> > > > On Tue, 9 Aug 2022 21:07:06 +0800 +> +> > > > Bobo WL wrote: +> +> > > > +> +> > > > > Hi Jonathan +> +> > > > > +> +> > > > > Thanks for your reply! +> +> > > > > +> +> > > > > On Mon, Aug 8, 2022 at 8:37 PM Jonathan Cameron +> +> > > > > wrote: +> +> > > > > > +> +> > > > > > Probably not related to your problem, but there is a disconnect +> +> > > > > > in QEMU / +> +> > > > > > kernel assumptionsaround the presence of an HDM decoder when a HB +> +> > > > > > only +> +> > > > > > has a single root port. Spec allows it to be provided or not as +> +> > > > > > an implementation choice. +> +> > > > > > Kernel assumes it isn't provide. Qemu assumes it is. +> +> > > > > > +> +> > > > > > The temporary solution is to throw in a second root port on the +> +> > > > > > HB and not +> +> > > > > > connect anything to it. Longer term I may special case this so +> +> > > > > > that the particular +> +> > > > > > decoder defaults to pass through settings in QEMU if there is +> +> > > > > > only one root port. +> +> > > > > > +> +> > > > > +> +> > > > > You are right! After adding an extra HB in qemu, I can create a x1 +> +> > > > > region successfully. +> +> > > > > But have some errors in Nvdimm: +> +> > > > > +> +> > > > > [ 74.925838] Unknown online node for memory at 0x10000000000, +> +> > > > > assuming node 0 +> +> > > > > [ 74.925846] Unknown target node for memory at 0x10000000000, +> +> > > > > assuming node 0 +> +> > > > > [ 74.927470] nd_region region0: nmem0: is disabled, failing probe +> +> > > > > +> +> > > > +> +> > > > Ah. I've seen this one, but not chased it down yet. Was on my todo +> +> > > > list to chase +> +> > > > down. Once I reach this state I can verify the HDM Decode is correct +> +> > > > which is what +> +> > > > I've been using to test (Which wasn't true until earlier this week). +> +> > > > I'm currently testing via devmem, more for historical reasons than +> +> > > > because it makes +> +> > > > that much sense anymore. +> +> > > +> +> > > *embarassed cough*. We haven't fully hooked the LSA up in qemu yet. +> +> > > I'd forgotten that was still on the todo list. I don't think it will +> +> > > be particularly hard to do and will take a look in next few days. +> +> > > +> +> > > Very very indirectly this error is causing a driver probe fail that +> +> > > means that +> +> > > we hit a code path that has a rather odd looking check on NDD_LABELING. +> +> > > Should not have gotten near that path though - hence the problem is +> +> > > actually +> +> > > when we call cxl_pmem_get_config_data() and it returns an error because +> +> > > we haven't fully connected up the command in QEMU. +> +> > +> +> > So a least one bug in QEMU. We were not supporting variable length +> +> > payloads on mailbox +> +> > inputs (but were on outputs). That hasn't mattered until we get to LSA +> +> > writes. +> +> > We just need to relax condition on the supplied length. +> +> > +> +> > diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +> +> > index c352a935c4..fdda9529fe 100644 +> +> > --- a/hw/cxl/cxl-mailbox-utils.c +> +> > +++ b/hw/cxl/cxl-mailbox-utils.c +> +> > @@ -510,7 +510,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) +> +> > cxl_cmd = &cxl_cmd_set[set][cmd]; +> +> > h = cxl_cmd->handler; +> +> > if (h) { +> +> > - if (len == cxl_cmd->in) { +> +> > + if (len == cxl_cmd->in || !cxl_cmd->in) { +> +> Fix is wrong as we use ~0 as the placeholder for variable payload, not 0. +> +> +> +> With that fixed we hit new fun paths - after some errors we get the +> +> worrying - not totally sure but looks like a failure on an error cleanup. +> +> I'll chase down the error source, but even then this is probably +> +> triggerable by +> +> hardware problem or similar. Some bonus prints in here from me chasing +> +> error paths, but it's otherwise just cxl/next + the fix I posted earlier +> +> today. +> +> +One of the scenarios that I cannot rule out is nvdimm_probe() racing +> +nd_region_probe(), but given all the work it takes to create a region I +> +suspect all the nvdimm_probe() work to have completed... +> +> +It is at least one potentially wrong hypothesis that needs to be chased +> +down. +Maybe there should be a special award for the non-intuitive +ndctl create-namespace command (modifies existing namespace and might create +a different empty one...) I'm sure there is some interesting history behind +that one :) + +Upshot is I just threw a filesystem on fsdax and wrote some text files on it +to allow easy grepping. The right data ends up in the memory and a plausible +namespace description is stored in the LSA. + +So to some degree at least it's 'working' on an 8 way direct connected +set of emulated devices. + +One snag is that serial number support isn't yet upstream in QEMU. +(I have had it in my tree for a while but not posted it yet because of + QEMU feature freeze) +https://gitlab.com/jic23/qemu/-/commit/144c783ea8a5fbe169f46ea1ba92940157f42733 +That's needed for meaningful cookie generation. Otherwise you can build the +namespace once, but it won't work on next probe as the cookie is 0 and you +hit some error paths. + +Maybe sensible to add a sanity check and fail namespace creation if +cookie is 0? (Silly side question, but is there a theoretical risk of +a serial number / other data combination leading to a fletcher64() +checksum that happens to be 0 - that would give a very odd bug report!) + +So to make it work the following is needed: + +1) The kernel fix for mailbox buffer overflow. +2) Qemu fix for size of arguements for get_lsa +3) Qemu fix to allow variable size input arguements (for set_lsa) +4) Serial number patch above + command lines to qemu to set appropriate + serial numbers. + +I'll send out the QEMU fixes shortly and post the Serial number patch, +though that almost certainly won't go in until next QEMU development +cycle starts in a few weeks. + +Next up, run through same tests on some other topologies. + +Jonathan + +> +> +> +> +> [ 69.919877] nd_bus ndbus0: START: nd_region.probe(region0) +> +> [ 69.920108] nd_region_probe +> +> [ 69.920623] ------------[ cut here ]------------ +> +> [ 69.920675] refcount_t: addition on 0; use-after-free. +> +> [ 69.921314] WARNING: CPU: 3 PID: 710 at lib/refcount.c:25 +> +> refcount_warn_saturate+0xa0/0x144 +> +> [ 69.926949] Modules linked in: cxl_pmem cxl_mem cxl_pci cxl_port +> +> cxl_acpi cxl_core +> +> [ 69.928830] CPU: 3 PID: 710 Comm: kworker/u8:9 Not tainted 5.19.0-rc3+ +> +> #399 +> +> [ 69.930596] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 +> +> 02/06/2015 +> +> [ 69.931482] Workqueue: events_unbound async_run_entry_fn +> +> [ 69.932403] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS +> +> BTYPE=--) +> +> [ 69.934023] pc : refcount_warn_saturate+0xa0/0x144 +> +> [ 69.935161] lr : refcount_warn_saturate+0xa0/0x144 +> +> [ 69.936541] sp : ffff80000890b960 +> +> [ 69.937921] x29: ffff80000890b960 x28: 0000000000000000 x27: +> +> 0000000000000000 +> +> [ 69.940917] x26: ffffa54a90d5cb10 x25: ffffa54a90809e98 x24: +> +> 0000000000000000 +> +> [ 69.942537] x23: ffffa54a91a3d8d8 x22: ffff0000c5254800 x21: +> +> ffff0000c5254800 +> +> [ 69.944013] x20: ffff0000ce924180 x19: ffff0000c5254800 x18: +> +> ffffffffffffffff +> +> [ 69.946100] x17: ffff5ab66e5ef000 x16: ffff80000801c000 x15: +> +> 0000000000000000 +> +> [ 69.947585] x14: 0000000000000001 x13: 0a2e656572662d72 x12: +> +> 657466612d657375 +> +> [ 69.948670] x11: 203b30206e6f206e x10: 6f69746964646120 x9 : +> +> ffffa54a8f63d288 +> +> [ 69.950679] x8 : 206e6f206e6f6974 x7 : 69646461203a745f x6 : +> +> 00000000fffff31e +> +> [ 69.952113] x5 : ffff0000ff61ba08 x4 : 00000000fffff31e x3 : +> +> ffff5ab66e5ef000 +> +> root@debian:/sys/bus/cxl/devices/decoder0.0/region0# [ 69.954752] x2 : +> +> 0000000000000000 x1 : 0000000000000000 x0 : ffff0000c512e740 +> +> [ 69.957098] Call trace: +> +> [ 69.957959] refcount_warn_saturate+0xa0/0x144 +> +> [ 69.958773] get_ndd+0x5c/0x80 +> +> [ 69.959294] nd_region_register_namespaces+0xe4/0xe90 +> +> [ 69.960253] nd_region_probe+0x100/0x290 +> +> [ 69.960796] nvdimm_bus_probe+0xf4/0x1c0 +> +> [ 69.962087] really_probe+0x19c/0x3f0 +> +> [ 69.962620] __driver_probe_device+0x11c/0x190 +> +> [ 69.963258] driver_probe_device+0x44/0xf4 +> +> [ 69.963773] __device_attach_driver+0xa4/0x140 +> +> [ 69.964471] bus_for_each_drv+0x84/0xe0 +> +> [ 69.965068] __device_attach+0xb0/0x1f0 +> +> [ 69.966101] device_initial_probe+0x20/0x30 +> +> [ 69.967142] bus_probe_device+0xa4/0xb0 +> +> [ 69.968104] device_add+0x3e8/0x910 +> +> [ 69.969111] nd_async_device_register+0x24/0x74 +> +> [ 69.969928] async_run_entry_fn+0x40/0x150 +> +> [ 69.970725] process_one_work+0x1dc/0x450 +> +> [ 69.971796] worker_thread+0x154/0x450 +> +> [ 69.972700] kthread+0x118/0x120 +> +> [ 69.974141] ret_from_fork+0x10/0x20 +> +> [ 69.975141] ---[ end trace 0000000000000000 ]--- +> +> [ 70.117887] Into nd_namespace_pmem_set_resource() + +Bobo WL wrote: +> +Hi list +> +> +I want to test cxl functions in arm64, and found some problems I can't +> +figure out. +> +> +My test environment: +> +> +1. build latest bios from +https://github.com/tianocore/edk2.git +master +> +branch(cc2db6ebfb6d9d85ba4c7b35fba1fa37fffc0bc2) +> +2. build latest qemu-system-aarch64 from git://git.qemu.org/qemu.git +> +master branch(846dcf0ba4eff824c295f06550b8673ff3f31314). With cxl arm +> +support patch: +> +https://patchwork.kernel.org/project/cxl/cover/20220616141950.23374-1-Jonathan.Cameron@huawei.com/ +> +3. build Linux kernel from +> +https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git +preview +> +branch(65fc1c3d26b96002a5aa1f4012fae4dc98fd5683) +> +4. build latest ndctl tools from +https://github.com/pmem/ndctl +> +create_region branch(8558b394e449779e3a4f3ae90fae77ede0bca159) +> +> +And my qemu test commands: +> +sudo $QEMU_BIN -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 \ +> +-cpu max -smp 8 -nographic -no-reboot \ +> +-kernel $KERNEL -bios $BIOS_BIN \ +> +-drive if=none,file=$ROOTFS,format=qcow2,id=hd \ +> +-device virtio-blk-pci,drive=hd -append 'root=/dev/vda1 +> +nokaslr dyndbg="module cxl* +p"' \ +> +-object memory-backend-ram,size=4G,id=mem0 \ +> +-numa node,nodeid=0,cpus=0-7,memdev=mem0 \ +> +-net nic -net user,hostfwd=tcp::2222-:22 -enable-kvm \ +> +-object +> +memory-backend-file,id=cxl-mem0,share=on,mem-path=/tmp/cxltest.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-mem1,share=on,mem-path=/tmp/cxltest1.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-mem2,share=on,mem-path=/tmp/cxltest2.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-mem3,share=on,mem-path=/tmp/cxltest3.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-lsa0,share=on,mem-path=/tmp/lsa0.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/lsa1.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-lsa2,share=on,mem-path=/tmp/lsa2.raw,size=256M +> +\ +> +-object +> +memory-backend-file,id=cxl-lsa3,share=on,mem-path=/tmp/lsa3.raw,size=256M +> +\ +> +-device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \ +> +-device cxl-rp,port=0,bus=cxl.1,id=root_port0,chassis=0,slot=0 \ +> +-device cxl-upstream,bus=root_port0,id=us0 \ +> +-device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \ +> +-device +> +cxl-type3,bus=swport0,memdev=cxl-mem0,lsa=cxl-lsa0,id=cxl-pmem0 \ +> +-device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \ +> +-device +> +cxl-type3,bus=swport1,memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem1 \ +> +-device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \ +> +-device +> +cxl-type3,bus=swport2,memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem2 \ +> +-device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \ +> +-device +> +cxl-type3,bus=swport3,memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3 \ +> +-M +> +cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k +> +> +And I have got two problems. +> +1. When I want to create x1 region with command: "cxl create-region -d +> +decoder0.0 -w 1 -g 4096 mem0", kernel crashed with null pointer +> +reference. Crash log: +> +> +[ 534.697324] cxl_region region0: config state: 0 +> +[ 534.697346] cxl_region region0: probe: -6 +> +[ 534.697368] cxl_acpi ACPI0017:00: decoder0.0: created region0 +> +[ 534.699115] cxl region0: mem0:endpoint3 decoder3.0 add: +> +mem0:decoder3.0 @ 0 next: none nr_eps: 1 nr_targets: 1 +> +[ 534.699149] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +> +mem0:decoder3.0 @ 0 next: mem0 nr_eps: 1 nr_targets: 1 +> +[ 534.699167] cxl region0: ACPI0016:00:port1 decoder1.0 add: +> +mem0:decoder3.0 @ 0 next: 0000:0d:00.0 nr_eps: 1 nr_targets: 1 +> +[ 534.699176] cxl region0: ACPI0016:00:port1 iw: 1 ig: 256 +> +[ 534.699182] cxl region0: ACPI0016:00:port1 target[0] = 0000:0c:00.0 +> +for mem0:decoder3.0 @ 0 +> +[ 534.699189] cxl region0: 0000:0d:00.0:port2 iw: 1 ig: 256 +> +[ 534.699193] cxl region0: 0000:0d:00.0:port2 target[0] = +> +0000:0e:00.0 for mem0:decoder3.0 @ 0 +> +[ 534.699405] Unable to handle kernel NULL pointer dereference at +> +virtual address 0000000000000000 +> +[ 534.701474] Mem abort info: +> +[ 534.701994] ESR = 0x0000000086000004 +> +[ 534.702653] EC = 0x21: IABT (current EL), IL = 32 bits +> +[ 534.703616] SET = 0, FnV = 0 +> +[ 534.704174] EA = 0, S1PTW = 0 +> +[ 534.704803] FSC = 0x04: level 0 translation fault +> +[ 534.705694] user pgtable: 4k pages, 48-bit VAs, pgdp=000000010144a000 +> +[ 534.706875] [0000000000000000] pgd=0000000000000000, p4d=0000000000000000 +> +[ 534.709855] Internal error: Oops: 86000004 [#1] PREEMPT SMP +> +[ 534.710301] Modules linked in: +> +[ 534.710546] CPU: 7 PID: 331 Comm: cxl Not tainted +> +5.19.0-rc3-00064-g65fc1c3d26b9-dirty #11 +> +[ 534.715393] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 +> +[ 534.717179] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) +> +[ 534.719190] pc : 0x0 +> +[ 534.719928] lr : commit_store+0x118/0x2cc +> +[ 534.721007] sp : ffff80000aec3c30 +> +[ 534.721793] x29: ffff80000aec3c30 x28: ffff0000da62e740 x27: +> +ffff0000c0c06b30 +> +[ 534.723875] x26: 0000000000000000 x25: ffff0000c0a2a400 x24: +> +ffff0000c0a29400 +> +[ 534.725440] x23: 0000000000000003 x22: 0000000000000000 x21: +> +ffff0000c0c06800 +> +[ 534.727312] x20: 0000000000000000 x19: ffff0000c1559800 x18: +> +0000000000000000 +> +[ 534.729138] x17: 0000000000000000 x16: 0000000000000000 x15: +> +0000ffffd41fe838 +> +[ 534.731046] x14: 0000000000000000 x13: 0000000000000000 x12: +> +0000000000000000 +> +[ 534.732402] x11: 0000000000000000 x10: 0000000000000000 x9 : +> +0000000000000000 +> +[ 534.734432] x8 : 0000000000000000 x7 : 0000000000000000 x6 : +> +ffff0000c0906e80 +> +[ 534.735921] x5 : 0000000000000000 x4 : 0000000000000000 x3 : +> +ffff80000aec3bf0 +> +[ 534.737437] x2 : 0000000000000000 x1 : 0000000000000000 x0 : +> +ffff0000c155a000 +> +[ 534.738878] Call trace: +> +[ 534.739368] 0x0 +> +[ 534.739713] dev_attr_store+0x1c/0x30 +> +[ 534.740186] sysfs_kf_write+0x48/0x58 +> +[ 534.740961] kernfs_fop_write_iter+0x128/0x184 +> +[ 534.741872] new_sync_write+0xdc/0x158 +> +[ 534.742706] vfs_write+0x1ac/0x2a8 +> +[ 534.743440] ksys_write+0x68/0xf0 +> +[ 534.744328] __arm64_sys_write+0x1c/0x28 +> +[ 534.745180] invoke_syscall+0x44/0xf0 +> +[ 534.745989] el0_svc_common+0x4c/0xfc +> +[ 534.746661] do_el0_svc+0x60/0xa8 +> +[ 534.747378] el0_svc+0x2c/0x78 +> +[ 534.748066] el0t_64_sync_handler+0xb8/0x12c +> +[ 534.748919] el0t_64_sync+0x18c/0x190 +> +[ 534.749629] Code: bad PC value +> +[ 534.750169] ---[ end trace 0000000000000000 ]--- +What was the top kernel commit when you ran this test? What is the line +number of "commit_store+0x118"? + +> +2. When I want to create x4 region with command: "cxl create-region -d +> +decoder0.0 -w 4 -g 4096 -m mem0 mem1 mem2 mem3". I got below errors: +> +> +cxl region: create_region: region0: failed to set target3 to mem3 +> +cxl region: cmd_create_region: created 0 regions +> +> +And kernel log as below: +> +[ 60.536663] cxl_region region0: config state: 0 +> +[ 60.536675] cxl_region region0: probe: -6 +> +[ 60.536696] cxl_acpi ACPI0017:00: decoder0.0: created region0 +> +[ 60.538251] cxl region0: mem0:endpoint3 decoder3.0 add: +> +mem0:decoder3.0 @ 0 next: none nr_eps: 1 nr_targets: 1 +> +[ 60.538278] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +> +mem0:decoder3.0 @ 0 next: mem0 nr_eps: 1 nr_targets: 1 +> +[ 60.538295] cxl region0: ACPI0016:00:port1 decoder1.0 add: +> +mem0:decoder3.0 @ 0 next: 0000:0d:00.0 nr_eps: 1 nr_targets: 1 +> +[ 60.538647] cxl region0: mem1:endpoint4 decoder4.0 add: +> +mem1:decoder4.0 @ 1 next: none nr_eps: 1 nr_targets: 1 +> +[ 60.538663] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +> +mem1:decoder4.0 @ 1 next: mem1 nr_eps: 2 nr_targets: 2 +> +[ 60.538675] cxl region0: ACPI0016:00:port1 decoder1.0 add: +> +mem1:decoder4.0 @ 1 next: 0000:0d:00.0 nr_eps: 2 nr_targets: 1 +> +[ 60.539311] cxl region0: mem2:endpoint5 decoder5.0 add: +> +mem2:decoder5.0 @ 2 next: none nr_eps: 1 nr_targets: 1 +> +[ 60.539332] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +> +mem2:decoder5.0 @ 2 next: mem2 nr_eps: 3 nr_targets: 3 +> +[ 60.539343] cxl region0: ACPI0016:00:port1 decoder1.0 add: +> +mem2:decoder5.0 @ 2 next: 0000:0d:00.0 nr_eps: 3 nr_targets: 1 +> +[ 60.539711] cxl region0: mem3:endpoint6 decoder6.0 add: +> +mem3:decoder6.0 @ 3 next: none nr_eps: 1 nr_targets: 1 +> +[ 60.539723] cxl region0: 0000:0d:00.0:port2 decoder2.0 add: +> +mem3:decoder6.0 @ 3 next: mem3 nr_eps: 4 nr_targets: 4 +> +[ 60.539735] cxl region0: ACPI0016:00:port1 decoder1.0 add: +> +mem3:decoder6.0 @ 3 next: 0000:0d:00.0 nr_eps: 4 nr_targets: 1 +> +[ 60.539742] cxl region0: ACPI0016:00:port1 iw: 1 ig: 256 +> +[ 60.539747] cxl region0: ACPI0016:00:port1 target[0] = 0000:0c:00.0 +> +for mem0:decoder3.0 @ 0 +> +[ 60.539754] cxl region0: 0000:0d:00.0:port2 iw: 4 ig: 512 +> +[ 60.539758] cxl region0: 0000:0d:00.0:port2 target[0] = +> +0000:0e:00.0 for mem0:decoder3.0 @ 0 +> +[ 60.539764] cxl region0: ACPI0016:00:port1: cannot host mem1:decoder4.0 at +> +1 +> +> +I have tried to write sysfs node manually, got same errors. +> +> +Hope I can get some helps here. +What is the output of: + + cxl list -MDTu -d decoder0.0 + +...? It might be the case that mem1 cannot be mapped by decoder0.0, or +at least not in the specified order, or that validation check is broken. + +Hi Dan, + +Thanks for your reply! + +On Mon, Aug 8, 2022 at 11:58 PM Dan Williams wrote: +> +> +What is the output of: +> +> +cxl list -MDTu -d decoder0.0 +> +> +...? It might be the case that mem1 cannot be mapped by decoder0.0, or +> +at least not in the specified order, or that validation check is broken. +Command "cxl list -MDTu -d decoder0.0" output: + +[ + { + "memdevs":[ + { + "memdev":"mem2", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":0, + "serial":"0", + "host":"0000:11:00.0" + }, + { + "memdev":"mem1", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":0, + "serial":"0", + "host":"0000:10:00.0" + }, + { + "memdev":"mem0", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":0, + "serial":"0", + "host":"0000:0f:00.0" + }, + { + "memdev":"mem3", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":0, + "serial":"0", + "host":"0000:12:00.0" + } + ] + }, + { + "root decoders":[ + { + "decoder":"decoder0.0", + "resource":"0x10000000000", + "size":"4.00 GiB (4.29 GB)", + "pmem_capable":true, + "volatile_capable":true, + "accelmem_capable":true, + "nr_targets":1, + "targets":[ + { + "target":"ACPI0016:01", + "alias":"pci0000:0c", + "position":0, + "id":"0xc" + } + ] + } + ] + } +] + +Bobo WL wrote: +> +Hi Dan, +> +> +Thanks for your reply! +> +> +On Mon, Aug 8, 2022 at 11:58 PM Dan Williams wrote: +> +> +> +> What is the output of: +> +> +> +> cxl list -MDTu -d decoder0.0 +> +> +> +> ...? It might be the case that mem1 cannot be mapped by decoder0.0, or +> +> at least not in the specified order, or that validation check is broken. +> +> +Command "cxl list -MDTu -d decoder0.0" output: +Thanks for this, I think I know the problem, but will try some +experiments with cxl_test first. + +Did the commit_store() crash stop reproducing with latest cxl/preview +branch? + +On Tue, Aug 9, 2022 at 11:17 PM Dan Williams wrote: +> +> +Bobo WL wrote: +> +> Hi Dan, +> +> +> +> Thanks for your reply! +> +> +> +> On Mon, Aug 8, 2022 at 11:58 PM Dan Williams +> +> wrote: +> +> > +> +> > What is the output of: +> +> > +> +> > cxl list -MDTu -d decoder0.0 +> +> > +> +> > ...? It might be the case that mem1 cannot be mapped by decoder0.0, or +> +> > at least not in the specified order, or that validation check is broken. +> +> +> +> Command "cxl list -MDTu -d decoder0.0" output: +> +> +Thanks for this, I think I know the problem, but will try some +> +experiments with cxl_test first. +> +> +Did the commit_store() crash stop reproducing with latest cxl/preview +> +branch? +No, still hitting this bug if don't add extra HB device in qemu + +Dan Williams wrote: +> +Bobo WL wrote: +> +> Hi Dan, +> +> +> +> Thanks for your reply! +> +> +> +> On Mon, Aug 8, 2022 at 11:58 PM Dan Williams +> +> wrote: +> +> > +> +> > What is the output of: +> +> > +> +> > cxl list -MDTu -d decoder0.0 +> +> > +> +> > ...? It might be the case that mem1 cannot be mapped by decoder0.0, or +> +> > at least not in the specified order, or that validation check is broken. +> +> +> +> Command "cxl list -MDTu -d decoder0.0" output: +> +> +Thanks for this, I think I know the problem, but will try some +> +experiments with cxl_test first. +Hmm, so my cxl_test experiment unfortunately passed so I'm not +reproducing the failure mode. This is the result of creating x4 region +with devices directly attached to a single host-bridge: + +# cxl create-region -d decoder3.5 -w 4 -m -g 256 mem{12,10,9,11} -s $((1<<30)) +{ + "region":"region8", + "resource":"0xf1f0000000", + "size":"1024.00 MiB (1073.74 MB)", + "interleave_ways":4, + "interleave_granularity":256, + "decode_state":"commit", + "mappings":[ + { + "position":3, + "memdev":"mem11", + "decoder":"decoder21.0" + }, + { + "position":2, + "memdev":"mem9", + "decoder":"decoder19.0" + }, + { + "position":1, + "memdev":"mem10", + "decoder":"decoder20.0" + }, + { + "position":0, + "memdev":"mem12", + "decoder":"decoder22.0" + } + ] +} +cxl region: cmd_create_region: created 1 region + +> +Did the commit_store() crash stop reproducing with latest cxl/preview +> +branch? +I missed the answer to this question. + +All of these changes are now in Linus' tree perhaps give that a try and +post the debug log again? + +On Thu, 11 Aug 2022 17:46:55 -0700 +Dan Williams wrote: + +> +Dan Williams wrote: +> +> Bobo WL wrote: +> +> > Hi Dan, +> +> > +> +> > Thanks for your reply! +> +> > +> +> > On Mon, Aug 8, 2022 at 11:58 PM Dan Williams +> +> > wrote: +> +> > > +> +> > > What is the output of: +> +> > > +> +> > > cxl list -MDTu -d decoder0.0 +> +> > > +> +> > > ...? It might be the case that mem1 cannot be mapped by decoder0.0, or +> +> > > at least not in the specified order, or that validation check is +> +> > > broken. +> +> > +> +> > Command "cxl list -MDTu -d decoder0.0" output: +> +> +> +> Thanks for this, I think I know the problem, but will try some +> +> experiments with cxl_test first. +> +> +Hmm, so my cxl_test experiment unfortunately passed so I'm not +> +reproducing the failure mode. This is the result of creating x4 region +> +with devices directly attached to a single host-bridge: +> +> +# cxl create-region -d decoder3.5 -w 4 -m -g 256 mem{12,10,9,11} -s $((1<<30)) +> +{ +> +"region":"region8", +> +"resource":"0xf1f0000000", +> +"size":"1024.00 MiB (1073.74 MB)", +> +"interleave_ways":4, +> +"interleave_granularity":256, +> +"decode_state":"commit", +> +"mappings":[ +> +{ +> +"position":3, +> +"memdev":"mem11", +> +"decoder":"decoder21.0" +> +}, +> +{ +> +"position":2, +> +"memdev":"mem9", +> +"decoder":"decoder19.0" +> +}, +> +{ +> +"position":1, +> +"memdev":"mem10", +> +"decoder":"decoder20.0" +> +}, +> +{ +> +"position":0, +> +"memdev":"mem12", +> +"decoder":"decoder22.0" +> +} +> +] +> +} +> +cxl region: cmd_create_region: created 1 region +> +> +> Did the commit_store() crash stop reproducing with latest cxl/preview +> +> branch? +> +> +I missed the answer to this question. +> +> +All of these changes are now in Linus' tree perhaps give that a try and +> +post the debug log again? +Hi Dan, + +I've moved onto looking at this one. +1 HB, 2RP (to make it configure the HDM decoder in the QEMU HB, I'll tidy that +up +at some stage), 1 switch, 4 downstream switch ports each with a type 3 + +I'm not getting a crash, but can't successfully setup a region. +Upon adding the final target +It's failing in check_last_peer() as pos < distance. +Seems distance is 4 which makes me think it's using the wrong level of the +heirarchy for +some reason or that distance check is wrong. +Wasn't a good idea to just skip that step though as it goes boom - though +stack trace is not useful. + +Jonathan + +On Wed, 17 Aug 2022 17:16:19 +0100 +Jonathan Cameron wrote: + +> +On Thu, 11 Aug 2022 17:46:55 -0700 +> +Dan Williams wrote: +> +> +> Dan Williams wrote: +> +> > Bobo WL wrote: +> +> > > Hi Dan, +> +> > > +> +> > > Thanks for your reply! +> +> > > +> +> > > On Mon, Aug 8, 2022 at 11:58 PM Dan Williams +> +> > > wrote: +> +> > > > +> +> > > > What is the output of: +> +> > > > +> +> > > > cxl list -MDTu -d decoder0.0 +> +> > > > +> +> > > > ...? It might be the case that mem1 cannot be mapped by decoder0.0, or +> +> > > > at least not in the specified order, or that validation check is +> +> > > > broken. +> +> > > +> +> > > Command "cxl list -MDTu -d decoder0.0" output: +> +> > +> +> > Thanks for this, I think I know the problem, but will try some +> +> > experiments with cxl_test first. +> +> +> +> Hmm, so my cxl_test experiment unfortunately passed so I'm not +> +> reproducing the failure mode. This is the result of creating x4 region +> +> with devices directly attached to a single host-bridge: +> +> +> +> # cxl create-region -d decoder3.5 -w 4 -m -g 256 mem{12,10,9,11} -s +> +> $((1<<30)) +> +> { +> +> "region":"region8", +> +> "resource":"0xf1f0000000", +> +> "size":"1024.00 MiB (1073.74 MB)", +> +> "interleave_ways":4, +> +> "interleave_granularity":256, +> +> "decode_state":"commit", +> +> "mappings":[ +> +> { +> +> "position":3, +> +> "memdev":"mem11", +> +> "decoder":"decoder21.0" +> +> }, +> +> { +> +> "position":2, +> +> "memdev":"mem9", +> +> "decoder":"decoder19.0" +> +> }, +> +> { +> +> "position":1, +> +> "memdev":"mem10", +> +> "decoder":"decoder20.0" +> +> }, +> +> { +> +> "position":0, +> +> "memdev":"mem12", +> +> "decoder":"decoder22.0" +> +> } +> +> ] +> +> } +> +> cxl region: cmd_create_region: created 1 region +> +> +> +> > Did the commit_store() crash stop reproducing with latest cxl/preview +> +> > branch? +> +> +> +> I missed the answer to this question. +> +> +> +> All of these changes are now in Linus' tree perhaps give that a try and +> +> post the debug log again? +> +> +Hi Dan, +> +> +I've moved onto looking at this one. +> +1 HB, 2RP (to make it configure the HDM decoder in the QEMU HB, I'll tidy +> +that up +> +at some stage), 1 switch, 4 downstream switch ports each with a type 3 +> +> +I'm not getting a crash, but can't successfully setup a region. +> +Upon adding the final target +> +It's failing in check_last_peer() as pos < distance. +> +Seems distance is 4 which makes me think it's using the wrong level of the +> +heirarchy for +> +some reason or that distance check is wrong. +> +Wasn't a good idea to just skip that step though as it goes boom - though +> +stack trace is not useful. +Turns out really weird corruption happens if you accidentally back two type3 +devices +with the same memory device. Who would have thought it :) + +That aside ignoring the check_last_peer() failure seems to make everything work +for this +topology. I'm not seeing the crash, so my guess is we fixed it somewhere along +the way. + +Now for the fun one. I've replicated the crash if we have + +1HB 1*RP 1SW, 4SW-DSP, 4Type3 + +Now, I'd expect to see it not 'work' because the QEMU HDM decoder won't be +programmed +but the null pointer dereference isn't related to that. + +The bug is straight forward. Not all decoders have commit callbacks... Will +send out +a possible fix shortly. + +Jonathan + + + +> +> +Jonathan +> +> +> +> +> +> + +On Thu, 18 Aug 2022 17:37:40 +0100 +Jonathan Cameron via wrote: + +> +On Wed, 17 Aug 2022 17:16:19 +0100 +> +Jonathan Cameron wrote: +> +> +> On Thu, 11 Aug 2022 17:46:55 -0700 +> +> Dan Williams wrote: +> +> +> +> > Dan Williams wrote: +> +> > > Bobo WL wrote: +> +> > > > Hi Dan, +> +> > > > +> +> > > > Thanks for your reply! +> +> > > > +> +> > > > On Mon, Aug 8, 2022 at 11:58 PM Dan Williams +> +> > > > wrote: +> +> > > > > +> +> > > > > What is the output of: +> +> > > > > +> +> > > > > cxl list -MDTu -d decoder0.0 +> +> > > > > +> +> > > > > ...? It might be the case that mem1 cannot be mapped by decoder0.0, +> +> > > > > or +> +> > > > > at least not in the specified order, or that validation check is +> +> > > > > broken. +> +> > > > +> +> > > > Command "cxl list -MDTu -d decoder0.0" output: +> +> > > +> +> > > Thanks for this, I think I know the problem, but will try some +> +> > > experiments with cxl_test first. +> +> > +> +> > Hmm, so my cxl_test experiment unfortunately passed so I'm not +> +> > reproducing the failure mode. This is the result of creating x4 region +> +> > with devices directly attached to a single host-bridge: +> +> > +> +> > # cxl create-region -d decoder3.5 -w 4 -m -g 256 mem{12,10,9,11} -s +> +> > $((1<<30)) +> +> > { +> +> > "region":"region8", +> +> > "resource":"0xf1f0000000", +> +> > "size":"1024.00 MiB (1073.74 MB)", +> +> > "interleave_ways":4, +> +> > "interleave_granularity":256, +> +> > "decode_state":"commit", +> +> > "mappings":[ +> +> > { +> +> > "position":3, +> +> > "memdev":"mem11", +> +> > "decoder":"decoder21.0" +> +> > }, +> +> > { +> +> > "position":2, +> +> > "memdev":"mem9", +> +> > "decoder":"decoder19.0" +> +> > }, +> +> > { +> +> > "position":1, +> +> > "memdev":"mem10", +> +> > "decoder":"decoder20.0" +> +> > }, +> +> > { +> +> > "position":0, +> +> > "memdev":"mem12", +> +> > "decoder":"decoder22.0" +> +> > } +> +> > ] +> +> > } +> +> > cxl region: cmd_create_region: created 1 region +> +> > +> +> > > Did the commit_store() crash stop reproducing with latest cxl/preview +> +> > > branch? +> +> > +> +> > I missed the answer to this question. +> +> > +> +> > All of these changes are now in Linus' tree perhaps give that a try and +> +> > post the debug log again? +> +> +> +> Hi Dan, +> +> +> +> I've moved onto looking at this one. +> +> 1 HB, 2RP (to make it configure the HDM decoder in the QEMU HB, I'll tidy +> +> that up +> +> at some stage), 1 switch, 4 downstream switch ports each with a type 3 +> +> +> +> I'm not getting a crash, but can't successfully setup a region. +> +> Upon adding the final target +> +> It's failing in check_last_peer() as pos < distance. +> +> Seems distance is 4 which makes me think it's using the wrong level of the +> +> heirarchy for +> +> some reason or that distance check is wrong. +> +> Wasn't a good idea to just skip that step though as it goes boom - though +> +> stack trace is not useful. +> +> +Turns out really weird corruption happens if you accidentally back two type3 +> +devices +> +with the same memory device. Who would have thought it :) +> +> +That aside ignoring the check_last_peer() failure seems to make everything +> +work for this +> +topology. I'm not seeing the crash, so my guess is we fixed it somewhere +> +along the way. +> +> +Now for the fun one. I've replicated the crash if we have +> +> +1HB 1*RP 1SW, 4SW-DSP, 4Type3 +> +> +Now, I'd expect to see it not 'work' because the QEMU HDM decoder won't be +> +programmed +> +but the null pointer dereference isn't related to that. +> +> +The bug is straight forward. Not all decoders have commit callbacks... Will +> +send out +> +a possible fix shortly. +> +For completeness I'm carrying this hack because I haven't gotten my head +around the right fix for check_last_peer() failing on this test topology. + +diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c +index c49d9a5f1091..275e143bd748 100644 +--- a/drivers/cxl/core/region.c ++++ b/drivers/cxl/core/region.c +@@ -978,7 +978,7 @@ static int cxl_port_setup_targets(struct cxl_port *port, + rc = check_last_peer(cxled, ep, cxl_rr, + distance); + if (rc) +- return rc; ++ // return rc; + goto out_target_set; + } + goto add_target; +-- + +I might find more bugs with more testing, but this is all the ones I've +seen so far + in Bobo's reports. Qemu fixes are now in upstream so +will be there in the release. + +As a reminder, testing on QEMU has a few corners... + +Need a patch to add serial number ECAP support. It is on list for revew, +but will have wait for after QEMU 7.1 release (which may be next week) + +QEMU still assumes HDM decoder on the host bridge will be programmed. +So if you want anything to work there should be at least +2 RP below the HB (no need to plug anything in to one of them). + +I don't want to add a commandline parameter to hide the decoder in QEMU +and detecting there is only one RP would require moving a bunch of static +stuff into runtime code (I think). + +I still think we should make the kernel check to see if there is a decoder, +but if not I might see how bad a hack it is to have QEMU ignore that decoder +if not committed in this one special case (HB HDM decoder with only one place +it can send stuff). Obviously that would be a break from specification +so less than idea! + +Thanks, + +Jonathan + +On Fri, 19 Aug 2022 09:46:55 +0100 +Jonathan Cameron wrote: + +> +On Thu, 18 Aug 2022 17:37:40 +0100 +> +Jonathan Cameron via wrote: +> +> +> On Wed, 17 Aug 2022 17:16:19 +0100 +> +> Jonathan Cameron wrote: +> +> +> +> > On Thu, 11 Aug 2022 17:46:55 -0700 +> +> > Dan Williams wrote: +> +> > +> +> > > Dan Williams wrote: +> +> > > > Bobo WL wrote: +> +> > > > > Hi Dan, +> +> > > > > +> +> > > > > Thanks for your reply! +> +> > > > > +> +> > > > > On Mon, Aug 8, 2022 at 11:58 PM Dan Williams +> +> > > > > wrote: +> +> > > > > > +> +> > > > > > What is the output of: +> +> > > > > > +> +> > > > > > cxl list -MDTu -d decoder0.0 +> +> > > > > > +> +> > > > > > ...? It might be the case that mem1 cannot be mapped by +> +> > > > > > decoder0.0, or +> +> > > > > > at least not in the specified order, or that validation check is +> +> > > > > > broken. +> +> > > > > +> +> > > > > Command "cxl list -MDTu -d decoder0.0" output: +> +> > > > +> +> > > > Thanks for this, I think I know the problem, but will try some +> +> > > > experiments with cxl_test first. +> +> > > +> +> > > Hmm, so my cxl_test experiment unfortunately passed so I'm not +> +> > > reproducing the failure mode. This is the result of creating x4 region +> +> > > with devices directly attached to a single host-bridge: +> +> > > +> +> > > # cxl create-region -d decoder3.5 -w 4 -m -g 256 mem{12,10,9,11} -s +> +> > > $((1<<30)) +> +> > > { +> +> > > "region":"region8", +> +> > > "resource":"0xf1f0000000", +> +> > > "size":"1024.00 MiB (1073.74 MB)", +> +> > > "interleave_ways":4, +> +> > > "interleave_granularity":256, +> +> > > "decode_state":"commit", +> +> > > "mappings":[ +> +> > > { +> +> > > "position":3, +> +> > > "memdev":"mem11", +> +> > > "decoder":"decoder21.0" +> +> > > }, +> +> > > { +> +> > > "position":2, +> +> > > "memdev":"mem9", +> +> > > "decoder":"decoder19.0" +> +> > > }, +> +> > > { +> +> > > "position":1, +> +> > > "memdev":"mem10", +> +> > > "decoder":"decoder20.0" +> +> > > }, +> +> > > { +> +> > > "position":0, +> +> > > "memdev":"mem12", +> +> > > "decoder":"decoder22.0" +> +> > > } +> +> > > ] +> +> > > } +> +> > > cxl region: cmd_create_region: created 1 region +> +> > > +> +> > > > Did the commit_store() crash stop reproducing with latest cxl/preview +> +> > > > branch? +> +> > > +> +> > > I missed the answer to this question. +> +> > > +> +> > > All of these changes are now in Linus' tree perhaps give that a try and +> +> > > post the debug log again? +> +> > +> +> > Hi Dan, +> +> > +> +> > I've moved onto looking at this one. +> +> > 1 HB, 2RP (to make it configure the HDM decoder in the QEMU HB, I'll tidy +> +> > that up +> +> > at some stage), 1 switch, 4 downstream switch ports each with a type 3 +> +> > +> +> > I'm not getting a crash, but can't successfully setup a region. +> +> > Upon adding the final target +> +> > It's failing in check_last_peer() as pos < distance. +> +> > Seems distance is 4 which makes me think it's using the wrong level of +> +> > the heirarchy for +> +> > some reason or that distance check is wrong. +> +> > Wasn't a good idea to just skip that step though as it goes boom - though +> +> > stack trace is not useful. +> +> +> +> Turns out really weird corruption happens if you accidentally back two +> +> type3 devices +> +> with the same memory device. Who would have thought it :) +> +> +> +> That aside ignoring the check_last_peer() failure seems to make everything +> +> work for this +> +> topology. I'm not seeing the crash, so my guess is we fixed it somewhere +> +> along the way. +> +> +> +> Now for the fun one. I've replicated the crash if we have +> +> +> +> 1HB 1*RP 1SW, 4SW-DSP, 4Type3 +> +> +> +> Now, I'd expect to see it not 'work' because the QEMU HDM decoder won't be +> +> programmed +> +> but the null pointer dereference isn't related to that. +> +> +> +> The bug is straight forward. Not all decoders have commit callbacks... +> +> Will send out +> +> a possible fix shortly. +> +> +> +For completeness I'm carrying this hack because I haven't gotten my head +> +around the right fix for check_last_peer() failing on this test topology. +> +> +diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c +> +index c49d9a5f1091..275e143bd748 100644 +> +--- a/drivers/cxl/core/region.c +> ++++ b/drivers/cxl/core/region.c +> +@@ -978,7 +978,7 @@ static int cxl_port_setup_targets(struct cxl_port *port, +> +rc = check_last_peer(cxled, ep, cxl_rr, +> +distance); +> +if (rc) +> +- return rc; +> ++ // return rc; +> +goto out_target_set; +> +} +> +goto add_target; +I'm still carrying this hack and still haven't worked out the right fix. + +Suggestions welcome! If not I'll hopefully get some time on this +towards the end of the week. + +Jonathan + diff --git a/classification_output/01/instruction/3457423 b/classification_output/01/instruction/3457423 deleted file mode 100644 index ffcf905b4..000000000 --- a/classification_output/01/instruction/3457423 +++ /dev/null @@ -1,40 +0,0 @@ -instruction: 0.778 -semantic: 0.635 -mistranslation: 0.537 -other: 0.236 - -[Qemu-devel] [BUG] Failed to compile using gcc7.1 - -Hi all, - -After upgrading gcc from 6.3.1 to 7.1.1, qemu can't be compiled with gcc. - -The error is: - ------- - CC block/blkdebug.o -block/blkdebug.c: In function 'blkdebug_refresh_filename': -block/blkdebug.c:693:31: error: '%s' directive output may be truncated -writing up to 4095 bytes into a region of size 4086 -[-Werror=format-truncation=] -"blkdebug:%s:%s", s->config_file ?: "", - ^~ -In file included from /usr/include/stdio.h:939:0, - from /home/adam/qemu/include/qemu/osdep.h:68, - from block/blkdebug.c:25: -/usr/include/bits/stdio2.h:64:10: note: '__builtin___snprintf_chk' -output 11 or more bytes (assuming 4106) into a destination of size 4096 -return __builtin___snprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1, - ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - __bos (__s), __fmt, __va_arg_pack ()); - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -cc1: all warnings being treated as errors -make: *** [/home/adam/qemu/rules.mak:69: block/blkdebug.o] Error 1 ------- - -It seems that gcc 7 is introducing more restrict check for printf. -If using clang, although there are some extra warning, it can at least -pass the compile. -Thanks, -Qu - diff --git a/classification_output/01/instruction/42226390 b/classification_output/01/instruction/42226390 new file mode 100644 index 000000000..1d455d6fa --- /dev/null +++ b/classification_output/01/instruction/42226390 @@ -0,0 +1,187 @@ +instruction: 0.925 +semantic: 0.924 +other: 0.894 +mistranslation: 0.826 + +[BUG] AArch64 boot hang with -icount and -smp >1 (iothread locking issue?) + +Hello, + +I am encountering one or more bugs when using -icount and -smp >1 that I am +attempting to sort out. My current theory is that it is an iothread locking +issue. + +I am using a command-line like the following where $kernel is a recent upstream +AArch64 Linux kernel Image (I can provide a binary if that would be helpful - +let me know how is best to post): + + qemu-system-aarch64 \ + -M virt -cpu cortex-a57 -m 1G \ + -nographic \ + -smp 2 \ + -icount 0 \ + -kernel $kernel + +For any/all of the symptoms described below, they seem to disappear when I +either remove `-icount 0` or change smp to `-smp 1`. In other words, it is the +combination of `-smp >1` and `-icount` which triggers what I'm seeing. + +I am seeing two different (but seemingly related) behaviors. The first (and +what I originally started debugging) shows up as a boot hang. When booting +using the above command after Peter's "icount: Take iothread lock when running +QEMU timers" patch [1], The kernel boots for a while and then hangs after: + +> +...snip... +> +[ 0.010764] Serial: AMBA PL011 UART driver +> +[ 0.016334] 9000000.pl011: ttyAMA0 at MMIO 0x9000000 (irq = 13, base_baud +> += 0) is a PL011 rev1 +> +[ 0.016907] printk: console [ttyAMA0] enabled +> +[ 0.017624] KASLR enabled +> +[ 0.031986] HugeTLB: registered 16.0 GiB page size, pre-allocated 0 pages +> +[ 0.031986] HugeTLB: 16320 KiB vmemmap can be freed for a 16.0 GiB page +> +[ 0.031986] HugeTLB: registered 512 MiB page size, pre-allocated 0 pages +> +[ 0.031986] HugeTLB: 448 KiB vmemmap can be freed for a 512 MiB page +> +[ 0.031986] HugeTLB: registered 2.00 MiB page size, pre-allocated 0 pages +> +[ 0.031986] HugeTLB: 0 KiB vmemmap can be freed for a 2.00 MiB page +When it hangs here, I drop into QEMU's console, attach to the gdbserver, and it +always reports that it is at address 0xffff800008dc42e8 (as shown below from an +objdump of the vmlinux). I note this is in the middle of messing with timer +system registers - which makes me suspect we're attempting to take the iothread +lock when its already held: + +> +ffff800008dc42b8 : +> +ffff800008dc42b8: d503201f nop +> +ffff800008dc42bc: d503201f nop +> +ffff800008dc42c0: d503233f paciasp +> +ffff800008dc42c4: d53be321 mrs x1, cntv_ctl_el0 +> +ffff800008dc42c8: 32000021 orr w1, w1, #0x1 +> +ffff800008dc42cc: d5033fdf isb +> +ffff800008dc42d0: d53be042 mrs x2, cntvct_el0 +> +ffff800008dc42d4: ca020043 eor x3, x2, x2 +> +ffff800008dc42d8: 8b2363e3 add x3, sp, x3 +> +ffff800008dc42dc: f940007f ldr xzr, [x3] +> +ffff800008dc42e0: 8b020000 add x0, x0, x2 +> +ffff800008dc42e4: d51be340 msr cntv_cval_el0, x0 +> +* ffff800008dc42e8: 927ef820 and x0, x1, #0xfffffffffffffffd +> +ffff800008dc42ec: d51be320 msr cntv_ctl_el0, x0 +> +ffff800008dc42f0: d5033fdf isb +> +ffff800008dc42f4: 52800000 mov w0, #0x0 +> +// #0 +> +ffff800008dc42f8: d50323bf autiasp +> +ffff800008dc42fc: d65f03c0 ret +The second behavior is that prior to Peter's "icount: Take iothread lock when +running QEMU timers" patch [1], I observe the following message (same command +as above): + +> +ERROR:../accel/tcg/tcg-accel-ops.c:79:tcg_handle_interrupt: assertion failed: +> +(qemu_mutex_iothread_locked()) +> +Aborted (core dumped) +This is the same behavior described in Gitlab issue 1130 [0] and addressed by +[1]. I bisected the appearance of this assertion, and found it was introduced +by Pavel's "replay: rewrite async event handling" commit [2]. Commits prior to +that one boot successfully (neither assertions nor hangs) with `-icount 0 -smp +2`. + +I've looked over these two commits ([1], [2]), but it is not obvious to me +how/why they might be interacting to produce the boot hangs I'm seeing and +I welcome any help investigating further. + +Thanks! + +-Aaron Lindsay + +[0] - +https://gitlab.com/qemu-project/qemu/-/issues/1130 +[1] - +https://gitlab.com/qemu-project/qemu/-/commit/c7f26ded6d5065e4116f630f6a490b55f6c5f58e +[2] - +https://gitlab.com/qemu-project/qemu/-/commit/60618e2d77691e44bb78e23b2b0cf07b5c405e56 + +On Fri, 21 Oct 2022 at 16:48, Aaron Lindsay + wrote: +> +> +Hello, +> +> +I am encountering one or more bugs when using -icount and -smp >1 that I am +> +attempting to sort out. My current theory is that it is an iothread locking +> +issue. +Weird coincidence, that is a bug that's been in the tree for months +but was only reported to me earlier this week. Try reverting +commit a82fd5a4ec24d923ff1e -- that should fix it. +CAFEAcA_i8x00hD-4XX18ySLNbCB6ds1-DSazVb4yDnF8skjd9A@mail.gmail.com +/">https://lore.kernel.org/qemu-devel/ +CAFEAcA_i8x00hD-4XX18ySLNbCB6ds1-DSazVb4yDnF8skjd9A@mail.gmail.com +/ +has the explanation. + +thanks +-- PMM + +On Oct 21 17:00, Peter Maydell wrote: +> +On Fri, 21 Oct 2022 at 16:48, Aaron Lindsay +> + wrote: +> +> +> +> Hello, +> +> +> +> I am encountering one or more bugs when using -icount and -smp >1 that I am +> +> attempting to sort out. My current theory is that it is an iothread locking +> +> issue. +> +> +Weird coincidence, that is a bug that's been in the tree for months +> +but was only reported to me earlier this week. Try reverting +> +commit a82fd5a4ec24d923ff1e -- that should fix it. +I can confirm that reverting a82fd5a4ec24d923ff1e fixes it for me. +Thanks for the help and fast response! + +-Aaron + diff --git a/classification_output/01/instruction/50773216 b/classification_output/01/instruction/50773216 new file mode 100644 index 000000000..d887fe7b5 --- /dev/null +++ b/classification_output/01/instruction/50773216 @@ -0,0 +1,110 @@ +instruction: 0.768 +other: 0.737 +semantic: 0.669 +mistranslation: 0.652 + +[Qemu-devel] Can I have someone's feedback on [bug 1809075] Concurrency bug on keyboard events: capslock LED messing up keycode streams causes character misses at guest kernel + +Hi everyone. +Can I please have someone's feedback on this bug? +https://bugs.launchpad.net/qemu/+bug/1809075 +Briefly, guest OS loses characters sent to it via vnc. And I spot the +bug in relation to ps2 driver. +I'm thinking of possible fixes and I might want to use a memory barrier. +But I would really like to have some suggestion from a qemu developer +first. For example, can we brutally drop capslock LED key events in ps2 +queue? +It is actually relevant to openQA, an automated QA tool for openSUSE. +And this bug blocks a few test cases for us. +Thank you in advance! + +Kind regards, +Gao Zhiyuan + +Cc'ing Marc-André & Gerd. + +On 12/19/18 10:31 AM, Gao Zhiyuan wrote: +> +Hi everyone. +> +> +Can I please have someone's feedback on this bug? +> +https://bugs.launchpad.net/qemu/+bug/1809075 +> +Briefly, guest OS loses characters sent to it via vnc. And I spot the +> +bug in relation to ps2 driver. +> +> +I'm thinking of possible fixes and I might want to use a memory barrier. +> +But I would really like to have some suggestion from a qemu developer +> +first. For example, can we brutally drop capslock LED key events in ps2 +> +queue? +> +> +It is actually relevant to openQA, an automated QA tool for openSUSE. +> +And this bug blocks a few test cases for us. +> +> +Thank you in advance! +> +> +Kind regards, +> +Gao Zhiyuan +> + +On Thu, Jan 03, 2019 at 12:05:54PM +0100, Philippe Mathieu-Daudé wrote: +> +Cc'ing Marc-André & Gerd. +> +> +On 12/19/18 10:31 AM, Gao Zhiyuan wrote: +> +> Hi everyone. +> +> +> +> Can I please have someone's feedback on this bug? +> +> +https://bugs.launchpad.net/qemu/+bug/1809075 +> +> Briefly, guest OS loses characters sent to it via vnc. And I spot the +> +> bug in relation to ps2 driver. +> +> +> +> I'm thinking of possible fixes and I might want to use a memory barrier. +> +> But I would really like to have some suggestion from a qemu developer +> +> first. For example, can we brutally drop capslock LED key events in ps2 +> +> queue? +There is no "capslock LED key event". 0xfa is KBD_REPLY_ACK, and the +device queues it in response to guest port writes. Yes, the ack can +race with actual key events. But IMO that isn't a bug in qemu. + +Probably the linux kernel just throws away everything until it got the +ack for the port write, and that way the key event gets lost. On +physical hardware you will not notice because it is next to impossible +to type fast enough to hit the race window. + +So, go fix the kernel. + +Alternatively fix vncdotool to send uppercase letters properly with +shift key pressed. Then qemu wouldn't generate capslock key events +(that happens because qemu thinks guest and host capslock state is out +of sync) and the guests's capslock led update request wouldn't get into +the way. + +cheers, + Gerd + diff --git a/classification_output/01/instruction/51610399 b/classification_output/01/instruction/51610399 new file mode 100644 index 000000000..a78585284 --- /dev/null +++ b/classification_output/01/instruction/51610399 @@ -0,0 +1,308 @@ +instruction: 0.985 +other: 0.985 +semantic: 0.984 +mistranslation: 0.983 + +[BUG][powerpc] KVM Guest Boot Failure – Hangs at "Booting Linux via __start()” + +Bug Description: +Encountering a boot failure when launching a KVM guest with +qemu-system-ppc64. The guest hangs at boot, and the QEMU monitor +crashes. +Reproduction Steps: +# qemu-system-ppc64 --version +QEMU emulator version 9.2.50 (v9.2.0-2799-g0462a32b4f) +Copyright (c) 2003-2025 Fabrice Bellard and the QEMU Project developers +# /usr/bin/qemu-system-ppc64 -name avocado-vt-vm1 -machine +pseries,accel=kvm \ +-m 32768 -smp 32,sockets=1,cores=32,threads=1 -nographic \ + -device virtio-scsi-pci,id=scsi \ +-drive +file=/home/kvmci/tests/data/avocado-vt/images/rhel8.0devel-ppc64le.qcow2,if=none,id=drive0,format=qcow2 +\ +-device scsi-hd,drive=drive0,bus=scsi.0 \ + -netdev bridge,id=net0,br=virbr0 \ + -device virtio-net-pci,netdev=net0 \ + -serial pty \ + -device virtio-balloon-pci \ + -cpu host +QEMU 9.2.50 monitor - type 'help' for more information +char device redirected to /dev/pts/2 (label serial0) +(qemu) +(qemu) qemu-system-ppc64: warning: kernel_irqchip allowed but +unavailable: IRQ_XIVE capability must be present for KVM +Falling back to kernel-irqchip=off +** Qemu Hang + +(In another ssh session) +# screen /dev/pts/2 +Preparing to boot Linux version 6.10.4-200.fc40.ppc64le +(mockbuild@c23cc4e677614c34bb22d54eeea4dc1f) (gcc (GCC) 14.2.1 20240801 +(Red Hat 14.2.1-1), GNU ld version 2.41-37.fc40) #1 SMP Sun Aug 11 +15:20:17 UTC 2024 +Detected machine type: 0000000000000101 +command line: +BOOT_IMAGE=(ieee1275/disk,msdos2)/vmlinuz-6.10.4-200.fc40.ppc64le +root=/dev/mapper/fedora-root ro rd.lvm.lv=fedora/root crashkernel=1024M +Max number of cores passed to firmware: 2048 (NR_CPUS = 2048) +Calling ibm,client-architecture-support... done +memory layout at init: + memory_limit : 0000000000000000 (16 MB aligned) + alloc_bottom : 0000000008200000 + alloc_top : 0000000030000000 + alloc_top_hi : 0000000800000000 + rmo_top : 0000000030000000 + ram_top : 0000000800000000 +instantiating rtas at 0x000000002fff0000... done +prom_hold_cpus: skipped +copying OF device tree... +Building dt strings... +Building dt structure... +Device tree strings 0x0000000008210000 -> 0x0000000008210bd0 +Device tree struct 0x0000000008220000 -> 0x0000000008230000 +Quiescing Open Firmware ... +Booting Linux via __start() @ 0x0000000000440000 ... +** Guest Console Hang + + +Git Bisect: +Performing git bisect points to the following patch: +# git bisect bad +e8291ec16da80566c121c68d9112be458954d90b is the first bad commit +commit e8291ec16da80566c121c68d9112be458954d90b (HEAD) +Author: Nicholas Piggin +Date: Thu Dec 19 13:40:31 2024 +1000 + + target/ppc: fix timebase register reset state +(H)DEC and PURR get reset before icount does, which causes them to +be +skewed and not match the init state. This can cause replay to not +match the recorded trace exactly. For DEC and HDEC this is usually +not +noticable since they tend to get programmed before affecting the + target machine. PURR has been observed to cause replay bugs when + running Linux. + + Fix this by resetting using a time of 0. + + Message-ID: <20241219034035.1826173-2-npiggin@gmail.com> + Signed-off-by: Nicholas Piggin + + hw/ppc/ppc.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + + +Reverting the patch helps boot the guest. +Thanks, +Misbah Anjum N + +Thanks for the report. + +Tricky problem. A secondary CPU is hanging before it is started by the +primary via rtas call. + +That secondary keeps calling kvm_cpu_exec(), which keeps exiting out +early with EXCP_HLT because kvm_arch_process_async_events() returns +true because that cpu has ->halted=1. That just goes around he run +loop because there is an interrupt pending (DEC). + +So it never runs. It also never releases the BQL, and another CPU, +the primary which is actually supposed to be running, is stuck in +spapr_set_all_lpcrs() in run_on_cpu() waiting for the BQL. + +This patch just exposes the bug I think, by causing the interrupt. +although I'm not quite sure why it's okay previously (-ve decrementer +values should be causing a timer exception too). The timer exception +should not be taken as an interrupt by those secondary CPUs, and it +doesn't because it is masked, until set_all_lpcrs sets an LPCR value +that enables powersave wakeup on decrementer interrupt. + +The start_powered_off sate just sets ->halted, which makes it look +like a powersaving state. Logically I think it's not the same thing +as far as spapr goes. I don't know why start_powered_off only sets +->halted, and not ->stop/stopped as well. + +Not sure how best to solve it cleanly. I'll send a revert if I can't +get something working soon. + +Thanks, +Nick + +On Tue Mar 18, 2025 at 7:09 AM AEST, misanjum wrote: +> +Bug Description: +> +Encountering a boot failure when launching a KVM guest with +> +qemu-system-ppc64. The guest hangs at boot, and the QEMU monitor +> +crashes. +> +> +> +Reproduction Steps: +> +# qemu-system-ppc64 --version +> +QEMU emulator version 9.2.50 (v9.2.0-2799-g0462a32b4f) +> +Copyright (c) 2003-2025 Fabrice Bellard and the QEMU Project developers +> +> +# /usr/bin/qemu-system-ppc64 -name avocado-vt-vm1 -machine +> +pseries,accel=kvm \ +> +-m 32768 -smp 32,sockets=1,cores=32,threads=1 -nographic \ +> +-device virtio-scsi-pci,id=scsi \ +> +-drive +> +file=/home/kvmci/tests/data/avocado-vt/images/rhel8.0devel-ppc64le.qcow2,if=none,id=drive0,format=qcow2 +> +> +\ +> +-device scsi-hd,drive=drive0,bus=scsi.0 \ +> +-netdev bridge,id=net0,br=virbr0 \ +> +-device virtio-net-pci,netdev=net0 \ +> +-serial pty \ +> +-device virtio-balloon-pci \ +> +-cpu host +> +QEMU 9.2.50 monitor - type 'help' for more information +> +char device redirected to /dev/pts/2 (label serial0) +> +(qemu) +> +(qemu) qemu-system-ppc64: warning: kernel_irqchip allowed but +> +unavailable: IRQ_XIVE capability must be present for KVM +> +Falling back to kernel-irqchip=off +> +** Qemu Hang +> +> +(In another ssh session) +> +# screen /dev/pts/2 +> +Preparing to boot Linux version 6.10.4-200.fc40.ppc64le +> +(mockbuild@c23cc4e677614c34bb22d54eeea4dc1f) (gcc (GCC) 14.2.1 20240801 +> +(Red Hat 14.2.1-1), GNU ld version 2.41-37.fc40) #1 SMP Sun Aug 11 +> +15:20:17 UTC 2024 +> +Detected machine type: 0000000000000101 +> +command line: +> +BOOT_IMAGE=(ieee1275/disk,msdos2)/vmlinuz-6.10.4-200.fc40.ppc64le +> +root=/dev/mapper/fedora-root ro rd.lvm.lv=fedora/root crashkernel=1024M +> +Max number of cores passed to firmware: 2048 (NR_CPUS = 2048) +> +Calling ibm,client-architecture-support... done +> +memory layout at init: +> +memory_limit : 0000000000000000 (16 MB aligned) +> +alloc_bottom : 0000000008200000 +> +alloc_top : 0000000030000000 +> +alloc_top_hi : 0000000800000000 +> +rmo_top : 0000000030000000 +> +ram_top : 0000000800000000 +> +instantiating rtas at 0x000000002fff0000... done +> +prom_hold_cpus: skipped +> +copying OF device tree... +> +Building dt strings... +> +Building dt structure... +> +Device tree strings 0x0000000008210000 -> 0x0000000008210bd0 +> +Device tree struct 0x0000000008220000 -> 0x0000000008230000 +> +Quiescing Open Firmware ... +> +Booting Linux via __start() @ 0x0000000000440000 ... +> +** Guest Console Hang +> +> +> +Git Bisect: +> +Performing git bisect points to the following patch: +> +# git bisect bad +> +e8291ec16da80566c121c68d9112be458954d90b is the first bad commit +> +commit e8291ec16da80566c121c68d9112be458954d90b (HEAD) +> +Author: Nicholas Piggin +> +Date: Thu Dec 19 13:40:31 2024 +1000 +> +> +target/ppc: fix timebase register reset state +> +> +(H)DEC and PURR get reset before icount does, which causes them to +> +be +> +skewed and not match the init state. This can cause replay to not +> +match the recorded trace exactly. For DEC and HDEC this is usually +> +not +> +noticable since they tend to get programmed before affecting the +> +target machine. PURR has been observed to cause replay bugs when +> +running Linux. +> +> +Fix this by resetting using a time of 0. +> +> +Message-ID: <20241219034035.1826173-2-npiggin@gmail.com> +> +Signed-off-by: Nicholas Piggin +> +> +hw/ppc/ppc.c | 11 ++++++++--- +> +1 file changed, 8 insertions(+), 3 deletions(-) +> +> +> +Reverting the patch helps boot the guest. +> +Thanks, +> +Misbah Anjum N + diff --git a/classification_output/01/instruction/55961334 b/classification_output/01/instruction/55961334 new file mode 100644 index 000000000..80cdabd29 --- /dev/null +++ b/classification_output/01/instruction/55961334 @@ -0,0 +1,39 @@ +instruction: 0.803 +semantic: 0.775 +mistranslation: 0.718 +other: 0.715 + +[Bug] "-ht" flag ignored under KVM - guest still reports HT + +Hi Community, +We have observed that the 'ht' feature bit cannot be disabled when QEMU runs +with KVM acceleration. +qemu-system-x86_64 \ + --enable-kvm \ + -machine q35 \ + -cpu host,-ht \ + -smp 4 \ + -m 4G \ + -drive file=rootfs.img,format=raw \ + -nographic \ + -append 'console=ttyS0 root=/dev/sda rw' +Because '-ht' is specified, the guest should expose no HT capability +(cpuid.1.edx[28] = 0), and /proc/cpuinfo shouldn't show HT feature, but we still +saw ht in linux guest when run 'cat /proc/cpuinfo'. +XiaoYao mentioned that: + +It has been the behavior of QEMU since + + commit 400281af34e5ee6aa9f5496b53d8f82c6fef9319 + Author: Andre Przywara + Date: Wed Aug 19 15:42:42 2009 +0200 + + set CPUID bits to present cores and threads topology + +that we cannot remove HT CPUID bit from guest via "-cpu xxx,-ht" if the +VM has >= 2 vcpus. +I'd like to know whether there's a plan to address this issue, or if the current +behaviour is considered acceptable. +Best regards, +Ewan. + diff --git a/classification_output/01/instruction/5843372 b/classification_output/01/instruction/5843372 deleted file mode 100644 index 784962c9c..000000000 --- a/classification_output/01/instruction/5843372 +++ /dev/null @@ -1,2056 +0,0 @@ -instruction: 0.818 -other: 0.811 -semantic: 0.793 -mistranslation: 0.758 - -[BUG, RFC] Block graph deadlock on job-dismiss - -Hi all, - -There's a bug in block layer which leads to block graph deadlock. -Notably, it takes place when blockdev IO is processed within a separate -iothread. - -This was initially caught by our tests, and I was able to reduce it to a -relatively simple reproducer. Such deadlocks are probably supposed to -be covered in iotests/graph-changes-while-io, but this deadlock isn't. - -Basically what the reproducer does is launches QEMU with a drive having -'iothread' option set, creates a chain of 2 snapshots, launches -block-commit job for a snapshot and then dismisses the job, starting -from the lower snapshot. If the guest is issuing IO at the same time, -there's a race in acquiring block graph lock and a potential deadlock. - -Here's how it can be reproduced: - -1. Run QEMU: -> -SRCDIR=/path/to/srcdir -> -> -> -> -> -$SRCDIR/build/qemu-system-x86_64 -enable-kvm \ -> -> --machine q35 -cpu Nehalem \ -> -> --name guest=alma8-vm,debug-threads=on \ -> -> --m 2g -smp 2 \ -> -> --nographic -nodefaults \ -> -> --qmp unix:/var/run/alma8-qmp.sock,server=on,wait=off \ -> -> --serial unix:/var/run/alma8-serial.sock,server=on,wait=off \ -> -> --object iothread,id=iothread0 \ -> -> --blockdev -> -node-name=disk,driver=qcow2,file.driver=file,file.filename=/path/to/img/alma8.qcow2 -> -\ -> --device virtio-blk-pci,drive=disk,iothread=iothread0 -2. Launch IO (random reads) from within the guest: -> -nc -U /var/run/alma8-serial.sock -> -... -> -[root@alma8-vm ~]# fio --name=randread --ioengine=libaio --direct=1 --bs=4k -> ---size=1G --numjobs=1 --time_based=1 --runtime=300 --group_reporting -> ---rw=randread --iodepth=1 --filename=/testfile -3. Run snapshots creation & removal of lower snapshot operation in a -loop (script attached): -> -while /bin/true ; do ./remove_lower_snap.sh ; done -And then it occasionally hangs. - -Note: I've tried bisecting this, and looks like deadlock occurs starting -from the following commit: - -(BAD) 5bdbaebcce virtio: Re-enable notifications after drain -(GOOD) c42c3833e0 virtio-scsi: Attach event vq notifier with no_poll - -On the latest v10.0.0 it does hang as well. - - -Here's backtrace of the main thread: - -> -#0 0x00007fc547d427ce in __ppoll (fds=0x557eb79657b0, nfds=1, -> -timeout=, sigmask=0x0) at ../sysdeps/unix/sysv/linux/ppoll.c:43 -> -#1 0x0000557eb47d955c in qemu_poll_ns (fds=0x557eb79657b0, nfds=1, -> -timeout=-1) at ../util/qemu-timer.c:329 -> -#2 0x0000557eb47b2204 in fdmon_poll_wait (ctx=0x557eb76c5f20, -> -ready_list=0x7ffd94b4edd8, timeout=-1) at ../util/fdmon-poll.c:79 -> -#3 0x0000557eb47b1c45 in aio_poll (ctx=0x557eb76c5f20, blocking=true) at -> -../util/aio-posix.c:730 -> -#4 0x0000557eb4621edd in bdrv_do_drained_begin (bs=0x557eb795e950, -> -parent=0x0, poll=true) at ../block/io.c:378 -> -#5 0x0000557eb4621f7b in bdrv_drained_begin (bs=0x557eb795e950) at -> -../block/io.c:391 -> -#6 0x0000557eb45ec125 in bdrv_change_aio_context (bs=0x557eb795e950, -> -ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -> -errp=0x0) -> -at ../block.c:7682 -> -#7 0x0000557eb45ebf2b in bdrv_child_change_aio_context (c=0x557eb7964250, -> -ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -> -errp=0x0) -> -at ../block.c:7608 -> -#8 0x0000557eb45ec0c4 in bdrv_change_aio_context (bs=0x557eb79575e0, -> -ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -> -errp=0x0) -> -at ../block.c:7668 -> -#9 0x0000557eb45ebf2b in bdrv_child_change_aio_context (c=0x557eb7e59110, -> -ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -> -errp=0x0) -> -at ../block.c:7608 -> -#10 0x0000557eb45ec0c4 in bdrv_change_aio_context (bs=0x557eb7e51960, -> -ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -> -errp=0x0) -> -at ../block.c:7668 -> -#11 0x0000557eb45ebf2b in bdrv_child_change_aio_context (c=0x557eb814ed80, -> -ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -> -errp=0x0) -> -at ../block.c:7608 -> -#12 0x0000557eb45ee8e4 in child_job_change_aio_ctx (c=0x557eb7c9d3f0, -> -ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -> -errp=0x0) -> -at ../blockjob.c:157 -> -#13 0x0000557eb45ebe2d in bdrv_parent_change_aio_context (c=0x557eb7c9d3f0, -> -ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -> -errp=0x0) -> -at ../block.c:7592 -> -#14 0x0000557eb45ec06b in bdrv_change_aio_context (bs=0x557eb7d74310, -> -ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -> -errp=0x0) -> -at ../block.c:7661 -> -#15 0x0000557eb45dcd7e in bdrv_child_cb_change_aio_ctx -> -(child=0x557eb8565af0, ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = -> -{...}, tran=0x557eb7a87160, errp=0x0) at ../block.c:1234 -> -#16 0x0000557eb45ebe2d in bdrv_parent_change_aio_context (c=0x557eb8565af0, -> -ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -> -errp=0x0) -> -at ../block.c:7592 -> -#17 0x0000557eb45ec06b in bdrv_change_aio_context (bs=0x557eb79575e0, -> -ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -> -errp=0x0) -> -at ../block.c:7661 -> -#18 0x0000557eb45ec1f3 in bdrv_try_change_aio_context (bs=0x557eb79575e0, -> -ctx=0x557eb76c5f20, ignore_child=0x0, errp=0x0) at ../block.c:7715 -> -#19 0x0000557eb45e1b15 in bdrv_root_unref_child (child=0x557eb7966f30) at -> -../block.c:3317 -> -#20 0x0000557eb45eeaa8 in block_job_remove_all_bdrv (job=0x557eb7952800) at -> -../blockjob.c:209 -> -#21 0x0000557eb45ee641 in block_job_free (job=0x557eb7952800) at -> -../blockjob.c:82 -> -#22 0x0000557eb45f17af in job_unref_locked (job=0x557eb7952800) at -> -../job.c:474 -> -#23 0x0000557eb45f257d in job_do_dismiss_locked (job=0x557eb7952800) at -> -../job.c:771 -> -#24 0x0000557eb45f25fe in job_dismiss_locked (jobptr=0x7ffd94b4f400, -> -errp=0x7ffd94b4f488) at ../job.c:783 -> ---Type for more, q to quit, c to continue without paging-- -> -#25 0x0000557eb45d8e84 in qmp_job_dismiss (id=0x557eb7aa42b0 "commit-snap1", -> -errp=0x7ffd94b4f488) at ../job-qmp.c:138 -> -#26 0x0000557eb472f6a3 in qmp_marshal_job_dismiss (args=0x7fc52c00a3b0, -> -ret=0x7fc53c880da8, errp=0x7fc53c880da0) at qapi/qapi-commands-job.c:221 -> -#27 0x0000557eb47a35f3 in do_qmp_dispatch_bh (opaque=0x7fc53c880e40) at -> -../qapi/qmp-dispatch.c:128 -> -#28 0x0000557eb47d1cd2 in aio_bh_call (bh=0x557eb79568f0) at -> -../util/async.c:172 -> -#29 0x0000557eb47d1df5 in aio_bh_poll (ctx=0x557eb76c0200) at -> -../util/async.c:219 -> -#30 0x0000557eb47b12f3 in aio_dispatch (ctx=0x557eb76c0200) at -> -../util/aio-posix.c:436 -> -#31 0x0000557eb47d2266 in aio_ctx_dispatch (source=0x557eb76c0200, -> -callback=0x0, user_data=0x0) at ../util/async.c:361 -> -#32 0x00007fc549232f4f in g_main_dispatch (context=0x557eb76c6430) at -> -../glib/gmain.c:3364 -> -#33 g_main_context_dispatch (context=0x557eb76c6430) at ../glib/gmain.c:4079 -> -#34 0x0000557eb47d3ab1 in glib_pollfds_poll () at ../util/main-loop.c:287 -> -#35 0x0000557eb47d3b38 in os_host_main_loop_wait (timeout=0) at -> -../util/main-loop.c:310 -> -#36 0x0000557eb47d3c58 in main_loop_wait (nonblocking=0) at -> -../util/main-loop.c:589 -> -#37 0x0000557eb4218b01 in qemu_main_loop () at ../system/runstate.c:835 -> -#38 0x0000557eb46df166 in qemu_default_main (opaque=0x0) at -> -../system/main.c:50 -> -#39 0x0000557eb46df215 in main (argc=24, argv=0x7ffd94b4f8d8) at -> -../system/main.c:80 -And here's coroutine trying to acquire read lock: - -> -(gdb) qemu coroutine reader_queue->entries.sqh_first -> -#0 0x0000557eb47d7068 in qemu_coroutine_switch (from_=0x557eb7aa48b0, -> -to_=0x7fc537fff508, action=COROUTINE_YIELD) at -> -../util/coroutine-ucontext.c:321 -> -#1 0x0000557eb47d4d4a in qemu_coroutine_yield () at -> -../util/qemu-coroutine.c:339 -> -#2 0x0000557eb47d56c8 in qemu_co_queue_wait_impl (queue=0x557eb59954c0 -> -, lock=0x7fc53c57de50, flags=0) at -> -../util/qemu-coroutine-lock.c:60 -> -#3 0x0000557eb461fea7 in bdrv_graph_co_rdlock () at ../block/graph-lock.c:231 -> -#4 0x0000557eb460c81a in graph_lockable_auto_lock (x=0x7fc53c57dee3) at -> -/home/root/src/qemu/master/include/block/graph-lock.h:213 -> -#5 0x0000557eb460fa41 in blk_co_do_preadv_part -> -(blk=0x557eb84c0810, offset=6890553344, bytes=4096, qiov=0x7fc530006988, -> -qiov_offset=0, flags=BDRV_REQ_REGISTERED_BUF) at ../block/block-backend.c:1339 -> -#6 0x0000557eb46104d7 in blk_aio_read_entry (opaque=0x7fc530003240) at -> -../block/block-backend.c:1619 -> -#7 0x0000557eb47d6c40 in coroutine_trampoline (i0=-1213577040, i1=21886) at -> -../util/coroutine-ucontext.c:175 -> -#8 0x00007fc547c2a360 in __start_context () at -> -../sysdeps/unix/sysv/linux/x86_64/__start_context.S:91 -> -#9 0x00007ffd94b4ea40 in () -> -#10 0x0000000000000000 in () -So it looks like main thread is processing job-dismiss request and is -holding write lock taken in block_job_remove_all_bdrv() (frame #20 -above). At the same time iothread spawns a coroutine which performs IO -request. Before the coroutine is spawned, blk_aio_prwv() increases -'in_flight' counter for Blk. Then blk_co_do_preadv_part() (frame #5) is -trying to acquire the read lock. But main thread isn't releasing the -lock as blk_root_drained_poll() returns true since blk->in_flight > 0. -Here's the deadlock. - -Any comments and suggestions on the subject are welcomed. Thanks! - -Andrey -remove_lower_snap.sh -Description: -application/shellscript - -On 4/24/25 8:32 PM, Andrey Drobyshev wrote: -> -Hi all, -> -> -There's a bug in block layer which leads to block graph deadlock. -> -Notably, it takes place when blockdev IO is processed within a separate -> -iothread. -> -> -This was initially caught by our tests, and I was able to reduce it to a -> -relatively simple reproducer. Such deadlocks are probably supposed to -> -be covered in iotests/graph-changes-while-io, but this deadlock isn't. -> -> -Basically what the reproducer does is launches QEMU with a drive having -> -'iothread' option set, creates a chain of 2 snapshots, launches -> -block-commit job for a snapshot and then dismisses the job, starting -> -from the lower snapshot. If the guest is issuing IO at the same time, -> -there's a race in acquiring block graph lock and a potential deadlock. -> -> -Here's how it can be reproduced: -> -> -[...] -> -I took a closer look at iotests/graph-changes-while-io, and have managed -to reproduce the same deadlock in a much simpler setup, without a guest. - -1. Run QSD:> ./build/storage-daemon/qemu-storage-daemon --object -iothread,id=iothread0 \ -> ---blockdev null-co,node-name=node0,read-zeroes=true \ -> -> ---nbd-server addr.type=unix,addr.path=/var/run/qsd_nbd.sock \ -> -> ---export -> -nbd,id=exp0,node-name=node0,iothread=iothread0,fixed-iothread=true,writable=true -> -\ -> ---chardev -> -socket,id=qmp-sock,path=/var/run/qsd_qmp.sock,server=on,wait=off \ -> ---monitor chardev=qmp-sock -2. Launch IO: -> -qemu-img bench -f raw -c 2000000 -> -'nbd+unix:///node0?socket=/var/run/qsd_nbd.sock' -3. Add 2 snapshots and remove lower one (script attached):> while -/bin/true ; do ./rls_qsd.sh ; done - -And then it hangs. - -I'll also send a patch with corresponding test case added directly to -iotests. - -This reproduce seems to be hanging starting from Fiona's commit -67446e605dc ("blockjob: drop AioContext lock before calling -bdrv_graph_wrlock()"). AioContext locks were dropped entirely later on -in Stefan's commit b49f4755c7 ("block: remove AioContext locking"), but -the problem remains. - -Andrey -rls_qsd.sh -Description: -application/shellscript - -From: Andrey Drobyshev - -This case is catching potential deadlock which takes place when job-dismiss -is issued when I/O requests are processed in a separate iothread. - -See -https://mail.gnu.org/archive/html/qemu-devel/2025-04/msg04421.html -Signed-off-by: Andrey Drobyshev ---- - .../qemu-iotests/tests/graph-changes-while-io | 101 ++++++++++++++++-- - .../tests/graph-changes-while-io.out | 4 +- - 2 files changed, 96 insertions(+), 9 deletions(-) - -diff --git a/tests/qemu-iotests/tests/graph-changes-while-io -b/tests/qemu-iotests/tests/graph-changes-while-io -index 194fda500e..e30f823da4 100755 ---- a/tests/qemu-iotests/tests/graph-changes-while-io -+++ b/tests/qemu-iotests/tests/graph-changes-while-io -@@ -27,6 +27,8 @@ from iotests import imgfmt, qemu_img, qemu_img_create, -qemu_io, \ - - - top = os.path.join(iotests.test_dir, 'top.img') -+snap1 = os.path.join(iotests.test_dir, 'snap1.img') -+snap2 = os.path.join(iotests.test_dir, 'snap2.img') - nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock') - - -@@ -58,6 +60,15 @@ class TestGraphChangesWhileIO(QMPTestCase): - def tearDown(self) -> None: - self.qsd.stop() - -+ def _wait_for_blockjob(self, status) -> None: -+ done = False -+ while not done: -+ for event in self.qsd.get_qmp().get_events(wait=10.0): -+ if event['event'] != 'JOB_STATUS_CHANGE': -+ continue -+ if event['data']['status'] == status: -+ done = True -+ - def test_blockdev_add_while_io(self) -> None: - # Run qemu-img bench in the background - bench_thr = Thread(target=do_qemu_img_bench) -@@ -116,13 +127,89 @@ class TestGraphChangesWhileIO(QMPTestCase): - 'device': 'job0', - }) - -- cancelled = False -- while not cancelled: -- for event in self.qsd.get_qmp().get_events(wait=10.0): -- if event['event'] != 'JOB_STATUS_CHANGE': -- continue -- if event['data']['status'] == 'null': -- cancelled = True -+ self._wait_for_blockjob('null') -+ -+ bench_thr.join() -+ -+ def test_remove_lower_snapshot_while_io(self) -> None: -+ # Run qemu-img bench in the background -+ bench_thr = Thread(target=do_qemu_img_bench, args=(100000, )) -+ bench_thr.start() -+ -+ # While I/O is performed on 'node0' node, consequently add 2 snapshots -+ # on top of it, then remove (commit) them starting from lower one. -+ while bench_thr.is_alive(): -+ # Recreate snapshot images on every iteration -+ qemu_img_create('-f', imgfmt, snap1, '1G') -+ qemu_img_create('-f', imgfmt, snap2, '1G') -+ -+ self.qsd.cmd('blockdev-add', { -+ 'driver': imgfmt, -+ 'node-name': 'snap1', -+ 'file': { -+ 'driver': 'file', -+ 'filename': snap1 -+ } -+ }) -+ -+ self.qsd.cmd('blockdev-snapshot', { -+ 'node': 'node0', -+ 'overlay': 'snap1', -+ }) -+ -+ self.qsd.cmd('blockdev-add', { -+ 'driver': imgfmt, -+ 'node-name': 'snap2', -+ 'file': { -+ 'driver': 'file', -+ 'filename': snap2 -+ } -+ }) -+ -+ self.qsd.cmd('blockdev-snapshot', { -+ 'node': 'snap1', -+ 'overlay': 'snap2', -+ }) -+ -+ self.qsd.cmd('block-commit', { -+ 'job-id': 'commit-snap1', -+ 'device': 'snap2', -+ 'top-node': 'snap1', -+ 'base-node': 'node0', -+ 'auto-finalize': True, -+ 'auto-dismiss': False, -+ }) -+ -+ self._wait_for_blockjob('concluded') -+ self.qsd.cmd('job-dismiss', { -+ 'id': 'commit-snap1', -+ }) -+ -+ self.qsd.cmd('block-commit', { -+ 'job-id': 'commit-snap2', -+ 'device': 'snap2', -+ 'top-node': 'snap2', -+ 'base-node': 'node0', -+ 'auto-finalize': True, -+ 'auto-dismiss': False, -+ }) -+ -+ self._wait_for_blockjob('ready') -+ self.qsd.cmd('job-complete', { -+ 'id': 'commit-snap2', -+ }) -+ -+ self._wait_for_blockjob('concluded') -+ self.qsd.cmd('job-dismiss', { -+ 'id': 'commit-snap2', -+ }) -+ -+ self.qsd.cmd('blockdev-del', { -+ 'node-name': 'snap1' -+ }) -+ self.qsd.cmd('blockdev-del', { -+ 'node-name': 'snap2' -+ }) - - bench_thr.join() - -diff --git a/tests/qemu-iotests/tests/graph-changes-while-io.out -b/tests/qemu-iotests/tests/graph-changes-while-io.out -index fbc63e62f8..8d7e996700 100644 ---- a/tests/qemu-iotests/tests/graph-changes-while-io.out -+++ b/tests/qemu-iotests/tests/graph-changes-while-io.out -@@ -1,5 +1,5 @@ --.. -+... - ---------------------------------------------------------------------- --Ran 2 tests -+Ran 3 tests - - OK --- -2.43.5 - -Am 24.04.25 um 19:32 schrieb Andrey Drobyshev: -> -So it looks like main thread is processing job-dismiss request and is -> -holding write lock taken in block_job_remove_all_bdrv() (frame #20 -> -above). At the same time iothread spawns a coroutine which performs IO -> -request. Before the coroutine is spawned, blk_aio_prwv() increases -> -'in_flight' counter for Blk. Then blk_co_do_preadv_part() (frame #5) is -> -trying to acquire the read lock. But main thread isn't releasing the -> -lock as blk_root_drained_poll() returns true since blk->in_flight > 0. -> -Here's the deadlock. -And for the IO test you provided, it's client->nb_requests that behaves -similarly to blk->in_flight here. - -The issue also reproduces easily when issuing the following QMP command -in a loop while doing IO on a device: - -> -void qmp_block_locked_drain(const char *node_name, Error **errp) -> -{ -> -BlockDriverState *bs; -> -> -bs = bdrv_find_node(node_name); -> -if (!bs) { -> -error_setg(errp, "node not found"); -> -return; -> -} -> -> -bdrv_graph_wrlock(); -> -bdrv_drained_begin(bs); -> -bdrv_drained_end(bs); -> -bdrv_graph_wrunlock(); -> -} -It seems like either it would be necessary to require: -1. not draining inside an exclusively locked section -or -2. making sure that variables used by drained_poll routines are only set -while holding the reader lock -? - -Those seem to require rather involved changes, so a third option might -be to make draining inside an exclusively locked section possible, by -embedding such locked sections in a drained section: - -> -diff --git a/blockjob.c b/blockjob.c -> -index 32007f31a9..9b2f3b3ea9 100644 -> ---- a/blockjob.c -> -+++ b/blockjob.c -> -@@ -198,6 +198,7 @@ void block_job_remove_all_bdrv(BlockJob *job) -> -* one to make sure that such a concurrent access does not attempt -> -* to process an already freed BdrvChild. -> -*/ -> -+ bdrv_drain_all_begin(); -> -bdrv_graph_wrlock(); -> -while (job->nodes) { -> -GSList *l = job->nodes; -> -@@ -211,6 +212,7 @@ void block_job_remove_all_bdrv(BlockJob *job) -> -g_slist_free_1(l); -> -} -> -bdrv_graph_wrunlock(); -> -+ bdrv_drain_all_end(); -> -} -> -> -bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs) -This seems to fix the issue at hand. I can send a patch if this is -considered an acceptable approach. - -Best Regards, -Fiona - -On 4/30/25 11:47 AM, Fiona Ebner wrote: -> -Am 24.04.25 um 19:32 schrieb Andrey Drobyshev: -> -> So it looks like main thread is processing job-dismiss request and is -> -> holding write lock taken in block_job_remove_all_bdrv() (frame #20 -> -> above). At the same time iothread spawns a coroutine which performs IO -> -> request. Before the coroutine is spawned, blk_aio_prwv() increases -> -> 'in_flight' counter for Blk. Then blk_co_do_preadv_part() (frame #5) is -> -> trying to acquire the read lock. But main thread isn't releasing the -> -> lock as blk_root_drained_poll() returns true since blk->in_flight > 0. -> -> Here's the deadlock. -> -> -And for the IO test you provided, it's client->nb_requests that behaves -> -similarly to blk->in_flight here. -> -> -The issue also reproduces easily when issuing the following QMP command -> -in a loop while doing IO on a device: -> -> -> void qmp_block_locked_drain(const char *node_name, Error **errp) -> -> { -> -> BlockDriverState *bs; -> -> -> -> bs = bdrv_find_node(node_name); -> -> if (!bs) { -> -> error_setg(errp, "node not found"); -> -> return; -> -> } -> -> -> -> bdrv_graph_wrlock(); -> -> bdrv_drained_begin(bs); -> -> bdrv_drained_end(bs); -> -> bdrv_graph_wrunlock(); -> -> } -> -> -It seems like either it would be necessary to require: -> -1. not draining inside an exclusively locked section -> -or -> -2. making sure that variables used by drained_poll routines are only set -> -while holding the reader lock -> -? -> -> -Those seem to require rather involved changes, so a third option might -> -be to make draining inside an exclusively locked section possible, by -> -embedding such locked sections in a drained section: -> -> -> diff --git a/blockjob.c b/blockjob.c -> -> index 32007f31a9..9b2f3b3ea9 100644 -> -> --- a/blockjob.c -> -> +++ b/blockjob.c -> -> @@ -198,6 +198,7 @@ void block_job_remove_all_bdrv(BlockJob *job) -> -> * one to make sure that such a concurrent access does not attempt -> -> * to process an already freed BdrvChild. -> -> */ -> -> + bdrv_drain_all_begin(); -> -> bdrv_graph_wrlock(); -> -> while (job->nodes) { -> -> GSList *l = job->nodes; -> -> @@ -211,6 +212,7 @@ void block_job_remove_all_bdrv(BlockJob *job) -> -> g_slist_free_1(l); -> -> } -> -> bdrv_graph_wrunlock(); -> -> + bdrv_drain_all_end(); -> -> } -> -> -> -> bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs) -> -> -This seems to fix the issue at hand. I can send a patch if this is -> -considered an acceptable approach. -> -> -Best Regards, -> -Fiona -> -Hello Fiona, - -Thanks for looking into it. I've tried your 3rd option above and can -confirm it does fix the deadlock, at least I can't reproduce it. Other -iotests also don't seem to be breaking. So I personally am fine with -that patch. Would be nice to hear a word from the maintainers though on -whether there're any caveats with such approach. - -Andrey - -On Wed, Apr 30, 2025 at 10:11 AM Andrey Drobyshev - wrote: -> -> -On 4/30/25 11:47 AM, Fiona Ebner wrote: -> -> Am 24.04.25 um 19:32 schrieb Andrey Drobyshev: -> ->> So it looks like main thread is processing job-dismiss request and is -> ->> holding write lock taken in block_job_remove_all_bdrv() (frame #20 -> ->> above). At the same time iothread spawns a coroutine which performs IO -> ->> request. Before the coroutine is spawned, blk_aio_prwv() increases -> ->> 'in_flight' counter for Blk. Then blk_co_do_preadv_part() (frame #5) is -> ->> trying to acquire the read lock. But main thread isn't releasing the -> ->> lock as blk_root_drained_poll() returns true since blk->in_flight > 0. -> ->> Here's the deadlock. -> -> -> -> And for the IO test you provided, it's client->nb_requests that behaves -> -> similarly to blk->in_flight here. -> -> -> -> The issue also reproduces easily when issuing the following QMP command -> -> in a loop while doing IO on a device: -> -> -> ->> void qmp_block_locked_drain(const char *node_name, Error **errp) -> ->> { -> ->> BlockDriverState *bs; -> ->> -> ->> bs = bdrv_find_node(node_name); -> ->> if (!bs) { -> ->> error_setg(errp, "node not found"); -> ->> return; -> ->> } -> ->> -> ->> bdrv_graph_wrlock(); -> ->> bdrv_drained_begin(bs); -> ->> bdrv_drained_end(bs); -> ->> bdrv_graph_wrunlock(); -> ->> } -> -> -> -> It seems like either it would be necessary to require: -> -> 1. not draining inside an exclusively locked section -> -> or -> -> 2. making sure that variables used by drained_poll routines are only set -> -> while holding the reader lock -> -> ? -> -> -> -> Those seem to require rather involved changes, so a third option might -> -> be to make draining inside an exclusively locked section possible, by -> -> embedding such locked sections in a drained section: -> -> -> ->> diff --git a/blockjob.c b/blockjob.c -> ->> index 32007f31a9..9b2f3b3ea9 100644 -> ->> --- a/blockjob.c -> ->> +++ b/blockjob.c -> ->> @@ -198,6 +198,7 @@ void block_job_remove_all_bdrv(BlockJob *job) -> ->> * one to make sure that such a concurrent access does not attempt -> ->> * to process an already freed BdrvChild. -> ->> */ -> ->> + bdrv_drain_all_begin(); -> ->> bdrv_graph_wrlock(); -> ->> while (job->nodes) { -> ->> GSList *l = job->nodes; -> ->> @@ -211,6 +212,7 @@ void block_job_remove_all_bdrv(BlockJob *job) -> ->> g_slist_free_1(l); -> ->> } -> ->> bdrv_graph_wrunlock(); -> ->> + bdrv_drain_all_end(); -> ->> } -> ->> -> ->> bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs) -> -> -> -> This seems to fix the issue at hand. I can send a patch if this is -> -> considered an acceptable approach. -Kevin is aware of this thread but it's a public holiday tomorrow so it -may be a little longer. - -Stefan - -Am 24.04.2025 um 19:32 hat Andrey Drobyshev geschrieben: -> -Hi all, -> -> -There's a bug in block layer which leads to block graph deadlock. -> -Notably, it takes place when blockdev IO is processed within a separate -> -iothread. -> -> -This was initially caught by our tests, and I was able to reduce it to a -> -relatively simple reproducer. Such deadlocks are probably supposed to -> -be covered in iotests/graph-changes-while-io, but this deadlock isn't. -> -> -Basically what the reproducer does is launches QEMU with a drive having -> -'iothread' option set, creates a chain of 2 snapshots, launches -> -block-commit job for a snapshot and then dismisses the job, starting -> -from the lower snapshot. If the guest is issuing IO at the same time, -> -there's a race in acquiring block graph lock and a potential deadlock. -> -> -Here's how it can be reproduced: -> -> -1. Run QEMU: -> -> SRCDIR=/path/to/srcdir -> -> -> -> -> -> -> -> -> -> $SRCDIR/build/qemu-system-x86_64 -enable-kvm \ -> -> -> -> -machine q35 -cpu Nehalem \ -> -> -> -> -name guest=alma8-vm,debug-threads=on \ -> -> -> -> -m 2g -smp 2 \ -> -> -> -> -nographic -nodefaults \ -> -> -> -> -qmp unix:/var/run/alma8-qmp.sock,server=on,wait=off \ -> -> -> -> -serial unix:/var/run/alma8-serial.sock,server=on,wait=off \ -> -> -> -> -object iothread,id=iothread0 \ -> -> -> -> -blockdev -> -> node-name=disk,driver=qcow2,file.driver=file,file.filename=/path/to/img/alma8.qcow2 -> -> \ -> -> -device virtio-blk-pci,drive=disk,iothread=iothread0 -> -> -2. Launch IO (random reads) from within the guest: -> -> nc -U /var/run/alma8-serial.sock -> -> ... -> -> [root@alma8-vm ~]# fio --name=randread --ioengine=libaio --direct=1 --bs=4k -> -> --size=1G --numjobs=1 --time_based=1 --runtime=300 --group_reporting -> -> --rw=randread --iodepth=1 --filename=/testfile -> -> -3. Run snapshots creation & removal of lower snapshot operation in a -> -loop (script attached): -> -> while /bin/true ; do ./remove_lower_snap.sh ; done -> -> -And then it occasionally hangs. -> -> -Note: I've tried bisecting this, and looks like deadlock occurs starting -> -from the following commit: -> -> -(BAD) 5bdbaebcce virtio: Re-enable notifications after drain -> -(GOOD) c42c3833e0 virtio-scsi: Attach event vq notifier with no_poll -> -> -On the latest v10.0.0 it does hang as well. -> -> -> -Here's backtrace of the main thread: -> -> -> #0 0x00007fc547d427ce in __ppoll (fds=0x557eb79657b0, nfds=1, -> -> timeout=, sigmask=0x0) at -> -> ../sysdeps/unix/sysv/linux/ppoll.c:43 -> -> #1 0x0000557eb47d955c in qemu_poll_ns (fds=0x557eb79657b0, nfds=1, -> -> timeout=-1) at ../util/qemu-timer.c:329 -> -> #2 0x0000557eb47b2204 in fdmon_poll_wait (ctx=0x557eb76c5f20, -> -> ready_list=0x7ffd94b4edd8, timeout=-1) at ../util/fdmon-poll.c:79 -> -> #3 0x0000557eb47b1c45 in aio_poll (ctx=0x557eb76c5f20, blocking=true) at -> -> ../util/aio-posix.c:730 -> -> #4 0x0000557eb4621edd in bdrv_do_drained_begin (bs=0x557eb795e950, -> -> parent=0x0, poll=true) at ../block/io.c:378 -> -> #5 0x0000557eb4621f7b in bdrv_drained_begin (bs=0x557eb795e950) at -> -> ../block/io.c:391 -> -> #6 0x0000557eb45ec125 in bdrv_change_aio_context (bs=0x557eb795e950, -> -> ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -> -> errp=0x0) -> -> at ../block.c:7682 -> -> #7 0x0000557eb45ebf2b in bdrv_child_change_aio_context (c=0x557eb7964250, -> -> ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -> -> errp=0x0) -> -> at ../block.c:7608 -> -> #8 0x0000557eb45ec0c4 in bdrv_change_aio_context (bs=0x557eb79575e0, -> -> ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -> -> errp=0x0) -> -> at ../block.c:7668 -> -> #9 0x0000557eb45ebf2b in bdrv_child_change_aio_context (c=0x557eb7e59110, -> -> ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -> -> errp=0x0) -> -> at ../block.c:7608 -> -> #10 0x0000557eb45ec0c4 in bdrv_change_aio_context (bs=0x557eb7e51960, -> -> ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -> -> errp=0x0) -> -> at ../block.c:7668 -> -> #11 0x0000557eb45ebf2b in bdrv_child_change_aio_context (c=0x557eb814ed80, -> -> ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -> -> errp=0x0) -> -> at ../block.c:7608 -> -> #12 0x0000557eb45ee8e4 in child_job_change_aio_ctx (c=0x557eb7c9d3f0, -> -> ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -> -> errp=0x0) -> -> at ../blockjob.c:157 -> -> #13 0x0000557eb45ebe2d in bdrv_parent_change_aio_context (c=0x557eb7c9d3f0, -> -> ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -> -> errp=0x0) -> -> at ../block.c:7592 -> -> #14 0x0000557eb45ec06b in bdrv_change_aio_context (bs=0x557eb7d74310, -> -> ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -> -> errp=0x0) -> -> at ../block.c:7661 -> -> #15 0x0000557eb45dcd7e in bdrv_child_cb_change_aio_ctx -> -> (child=0x557eb8565af0, ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = -> -> {...}, tran=0x557eb7a87160, errp=0x0) at ../block.c:1234 -> -> #16 0x0000557eb45ebe2d in bdrv_parent_change_aio_context (c=0x557eb8565af0, -> -> ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -> -> errp=0x0) -> -> at ../block.c:7592 -> -> #17 0x0000557eb45ec06b in bdrv_change_aio_context (bs=0x557eb79575e0, -> -> ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -> -> errp=0x0) -> -> at ../block.c:7661 -> -> #18 0x0000557eb45ec1f3 in bdrv_try_change_aio_context (bs=0x557eb79575e0, -> -> ctx=0x557eb76c5f20, ignore_child=0x0, errp=0x0) at ../block.c:7715 -> -> #19 0x0000557eb45e1b15 in bdrv_root_unref_child (child=0x557eb7966f30) at -> -> ../block.c:3317 -> -> #20 0x0000557eb45eeaa8 in block_job_remove_all_bdrv (job=0x557eb7952800) at -> -> ../blockjob.c:209 -> -> #21 0x0000557eb45ee641 in block_job_free (job=0x557eb7952800) at -> -> ../blockjob.c:82 -> -> #22 0x0000557eb45f17af in job_unref_locked (job=0x557eb7952800) at -> -> ../job.c:474 -> -> #23 0x0000557eb45f257d in job_do_dismiss_locked (job=0x557eb7952800) at -> -> ../job.c:771 -> -> #24 0x0000557eb45f25fe in job_dismiss_locked (jobptr=0x7ffd94b4f400, -> -> errp=0x7ffd94b4f488) at ../job.c:783 -> -> --Type for more, q to quit, c to continue without paging-- -> -> #25 0x0000557eb45d8e84 in qmp_job_dismiss (id=0x557eb7aa42b0 -> -> "commit-snap1", errp=0x7ffd94b4f488) at ../job-qmp.c:138 -> -> #26 0x0000557eb472f6a3 in qmp_marshal_job_dismiss (args=0x7fc52c00a3b0, -> -> ret=0x7fc53c880da8, errp=0x7fc53c880da0) at qapi/qapi-commands-job.c:221 -> -> #27 0x0000557eb47a35f3 in do_qmp_dispatch_bh (opaque=0x7fc53c880e40) at -> -> ../qapi/qmp-dispatch.c:128 -> -> #28 0x0000557eb47d1cd2 in aio_bh_call (bh=0x557eb79568f0) at -> -> ../util/async.c:172 -> -> #29 0x0000557eb47d1df5 in aio_bh_poll (ctx=0x557eb76c0200) at -> -> ../util/async.c:219 -> -> #30 0x0000557eb47b12f3 in aio_dispatch (ctx=0x557eb76c0200) at -> -> ../util/aio-posix.c:436 -> -> #31 0x0000557eb47d2266 in aio_ctx_dispatch (source=0x557eb76c0200, -> -> callback=0x0, user_data=0x0) at ../util/async.c:361 -> -> #32 0x00007fc549232f4f in g_main_dispatch (context=0x557eb76c6430) at -> -> ../glib/gmain.c:3364 -> -> #33 g_main_context_dispatch (context=0x557eb76c6430) at ../glib/gmain.c:4079 -> -> #34 0x0000557eb47d3ab1 in glib_pollfds_poll () at ../util/main-loop.c:287 -> -> #35 0x0000557eb47d3b38 in os_host_main_loop_wait (timeout=0) at -> -> ../util/main-loop.c:310 -> -> #36 0x0000557eb47d3c58 in main_loop_wait (nonblocking=0) at -> -> ../util/main-loop.c:589 -> -> #37 0x0000557eb4218b01 in qemu_main_loop () at ../system/runstate.c:835 -> -> #38 0x0000557eb46df166 in qemu_default_main (opaque=0x0) at -> -> ../system/main.c:50 -> -> #39 0x0000557eb46df215 in main (argc=24, argv=0x7ffd94b4f8d8) at -> -> ../system/main.c:80 -> -> -> -And here's coroutine trying to acquire read lock: -> -> -> (gdb) qemu coroutine reader_queue->entries.sqh_first -> -> #0 0x0000557eb47d7068 in qemu_coroutine_switch (from_=0x557eb7aa48b0, -> -> to_=0x7fc537fff508, action=COROUTINE_YIELD) at -> -> ../util/coroutine-ucontext.c:321 -> -> #1 0x0000557eb47d4d4a in qemu_coroutine_yield () at -> -> ../util/qemu-coroutine.c:339 -> -> #2 0x0000557eb47d56c8 in qemu_co_queue_wait_impl (queue=0x557eb59954c0 -> -> , lock=0x7fc53c57de50, flags=0) at -> -> ../util/qemu-coroutine-lock.c:60 -> -> #3 0x0000557eb461fea7 in bdrv_graph_co_rdlock () at -> -> ../block/graph-lock.c:231 -> -> #4 0x0000557eb460c81a in graph_lockable_auto_lock (x=0x7fc53c57dee3) at -> -> /home/root/src/qemu/master/include/block/graph-lock.h:213 -> -> #5 0x0000557eb460fa41 in blk_co_do_preadv_part -> -> (blk=0x557eb84c0810, offset=6890553344, bytes=4096, -> -> qiov=0x7fc530006988, qiov_offset=0, flags=BDRV_REQ_REGISTERED_BUF) at -> -> ../block/block-backend.c:1339 -> -> #6 0x0000557eb46104d7 in blk_aio_read_entry (opaque=0x7fc530003240) at -> -> ../block/block-backend.c:1619 -> -> #7 0x0000557eb47d6c40 in coroutine_trampoline (i0=-1213577040, i1=21886) -> -> at ../util/coroutine-ucontext.c:175 -> -> #8 0x00007fc547c2a360 in __start_context () at -> -> ../sysdeps/unix/sysv/linux/x86_64/__start_context.S:91 -> -> #9 0x00007ffd94b4ea40 in () -> -> #10 0x0000000000000000 in () -> -> -> -So it looks like main thread is processing job-dismiss request and is -> -holding write lock taken in block_job_remove_all_bdrv() (frame #20 -> -above). At the same time iothread spawns a coroutine which performs IO -> -request. Before the coroutine is spawned, blk_aio_prwv() increases -> -'in_flight' counter for Blk. Then blk_co_do_preadv_part() (frame #5) is -> -trying to acquire the read lock. But main thread isn't releasing the -> -lock as blk_root_drained_poll() returns true since blk->in_flight > 0. -> -Here's the deadlock. -> -> -Any comments and suggestions on the subject are welcomed. Thanks! -I think this is what the blk_wait_while_drained() call was supposed to -address in blk_co_do_preadv_part(). However, with the use of multiple -I/O threads, this is racy. - -Do you think that in your case we hit the small race window between the -checks in blk_wait_while_drained() and GRAPH_RDLOCK_GUARD()? Or is there -another reason why blk_wait_while_drained() didn't do its job? - -Kevin - -On 5/2/25 19:34, Kevin Wolf wrote: -Am 24.04.2025 um 19:32 hat Andrey Drobyshev geschrieben: -Hi all, - -There's a bug in block layer which leads to block graph deadlock. -Notably, it takes place when blockdev IO is processed within a separate -iothread. - -This was initially caught by our tests, and I was able to reduce it to a -relatively simple reproducer. Such deadlocks are probably supposed to -be covered in iotests/graph-changes-while-io, but this deadlock isn't. - -Basically what the reproducer does is launches QEMU with a drive having -'iothread' option set, creates a chain of 2 snapshots, launches -block-commit job for a snapshot and then dismisses the job, starting -from the lower snapshot. If the guest is issuing IO at the same time, -there's a race in acquiring block graph lock and a potential deadlock. - -Here's how it can be reproduced: - -1. Run QEMU: -SRCDIR=/path/to/srcdir -$SRCDIR/build/qemu-system-x86_64 -enable-kvm \ --machine q35 -cpu Nehalem \ - -name guest=alma8-vm,debug-threads=on \ - -m 2g -smp 2 \ - -nographic -nodefaults \ - -qmp unix:/var/run/alma8-qmp.sock,server=on,wait=off \ - -serial unix:/var/run/alma8-serial.sock,server=on,wait=off \ - -object iothread,id=iothread0 \ - -blockdev -node-name=disk,driver=qcow2,file.driver=file,file.filename=/path/to/img/alma8.qcow2 - \ - -device virtio-blk-pci,drive=disk,iothread=iothread0 -2. Launch IO (random reads) from within the guest: -nc -U /var/run/alma8-serial.sock -... -[root@alma8-vm ~]# fio --name=randread --ioengine=libaio --direct=1 --bs=4k ---size=1G --numjobs=1 --time_based=1 --runtime=300 --group_reporting ---rw=randread --iodepth=1 --filename=/testfile -3. Run snapshots creation & removal of lower snapshot operation in a -loop (script attached): -while /bin/true ; do ./remove_lower_snap.sh ; done -And then it occasionally hangs. - -Note: I've tried bisecting this, and looks like deadlock occurs starting -from the following commit: - -(BAD) 5bdbaebcce virtio: Re-enable notifications after drain -(GOOD) c42c3833e0 virtio-scsi: Attach event vq notifier with no_poll - -On the latest v10.0.0 it does hang as well. - - -Here's backtrace of the main thread: -#0 0x00007fc547d427ce in __ppoll (fds=0x557eb79657b0, nfds=1, timeout=, sigmask=0x0) at ../sysdeps/unix/sysv/linux/ppoll.c:43 -#1 0x0000557eb47d955c in qemu_poll_ns (fds=0x557eb79657b0, nfds=1, timeout=-1) -at ../util/qemu-timer.c:329 -#2 0x0000557eb47b2204 in fdmon_poll_wait (ctx=0x557eb76c5f20, -ready_list=0x7ffd94b4edd8, timeout=-1) at ../util/fdmon-poll.c:79 -#3 0x0000557eb47b1c45 in aio_poll (ctx=0x557eb76c5f20, blocking=true) at -../util/aio-posix.c:730 -#4 0x0000557eb4621edd in bdrv_do_drained_begin (bs=0x557eb795e950, parent=0x0, -poll=true) at ../block/io.c:378 -#5 0x0000557eb4621f7b in bdrv_drained_begin (bs=0x557eb795e950) at -../block/io.c:391 -#6 0x0000557eb45ec125 in bdrv_change_aio_context (bs=0x557eb795e950, -ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -errp=0x0) - at ../block.c:7682 -#7 0x0000557eb45ebf2b in bdrv_child_change_aio_context (c=0x557eb7964250, -ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -errp=0x0) - at ../block.c:7608 -#8 0x0000557eb45ec0c4 in bdrv_change_aio_context (bs=0x557eb79575e0, -ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -errp=0x0) - at ../block.c:7668 -#9 0x0000557eb45ebf2b in bdrv_child_change_aio_context (c=0x557eb7e59110, -ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -errp=0x0) - at ../block.c:7608 -#10 0x0000557eb45ec0c4 in bdrv_change_aio_context (bs=0x557eb7e51960, -ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -errp=0x0) - at ../block.c:7668 -#11 0x0000557eb45ebf2b in bdrv_child_change_aio_context (c=0x557eb814ed80, -ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -errp=0x0) - at ../block.c:7608 -#12 0x0000557eb45ee8e4 in child_job_change_aio_ctx (c=0x557eb7c9d3f0, -ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -errp=0x0) - at ../blockjob.c:157 -#13 0x0000557eb45ebe2d in bdrv_parent_change_aio_context (c=0x557eb7c9d3f0, -ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -errp=0x0) - at ../block.c:7592 -#14 0x0000557eb45ec06b in bdrv_change_aio_context (bs=0x557eb7d74310, -ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -errp=0x0) - at ../block.c:7661 -#15 0x0000557eb45dcd7e in bdrv_child_cb_change_aio_ctx - (child=0x557eb8565af0, ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, -tran=0x557eb7a87160, errp=0x0) at ../block.c:1234 -#16 0x0000557eb45ebe2d in bdrv_parent_change_aio_context (c=0x557eb8565af0, -ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -errp=0x0) - at ../block.c:7592 -#17 0x0000557eb45ec06b in bdrv_change_aio_context (bs=0x557eb79575e0, -ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, tran=0x557eb7a87160, -errp=0x0) - at ../block.c:7661 -#18 0x0000557eb45ec1f3 in bdrv_try_change_aio_context (bs=0x557eb79575e0, -ctx=0x557eb76c5f20, ignore_child=0x0, errp=0x0) at ../block.c:7715 -#19 0x0000557eb45e1b15 in bdrv_root_unref_child (child=0x557eb7966f30) at -../block.c:3317 -#20 0x0000557eb45eeaa8 in block_job_remove_all_bdrv (job=0x557eb7952800) at -../blockjob.c:209 -#21 0x0000557eb45ee641 in block_job_free (job=0x557eb7952800) at -../blockjob.c:82 -#22 0x0000557eb45f17af in job_unref_locked (job=0x557eb7952800) at ../job.c:474 -#23 0x0000557eb45f257d in job_do_dismiss_locked (job=0x557eb7952800) at -../job.c:771 -#24 0x0000557eb45f25fe in job_dismiss_locked (jobptr=0x7ffd94b4f400, -errp=0x7ffd94b4f488) at ../job.c:783 ---Type for more, q to quit, c to continue without paging-- -#25 0x0000557eb45d8e84 in qmp_job_dismiss (id=0x557eb7aa42b0 "commit-snap1", -errp=0x7ffd94b4f488) at ../job-qmp.c:138 -#26 0x0000557eb472f6a3 in qmp_marshal_job_dismiss (args=0x7fc52c00a3b0, -ret=0x7fc53c880da8, errp=0x7fc53c880da0) at qapi/qapi-commands-job.c:221 -#27 0x0000557eb47a35f3 in do_qmp_dispatch_bh (opaque=0x7fc53c880e40) at -../qapi/qmp-dispatch.c:128 -#28 0x0000557eb47d1cd2 in aio_bh_call (bh=0x557eb79568f0) at ../util/async.c:172 -#29 0x0000557eb47d1df5 in aio_bh_poll (ctx=0x557eb76c0200) at -../util/async.c:219 -#30 0x0000557eb47b12f3 in aio_dispatch (ctx=0x557eb76c0200) at -../util/aio-posix.c:436 -#31 0x0000557eb47d2266 in aio_ctx_dispatch (source=0x557eb76c0200, -callback=0x0, user_data=0x0) at ../util/async.c:361 -#32 0x00007fc549232f4f in g_main_dispatch (context=0x557eb76c6430) at -../glib/gmain.c:3364 -#33 g_main_context_dispatch (context=0x557eb76c6430) at ../glib/gmain.c:4079 -#34 0x0000557eb47d3ab1 in glib_pollfds_poll () at ../util/main-loop.c:287 -#35 0x0000557eb47d3b38 in os_host_main_loop_wait (timeout=0) at -../util/main-loop.c:310 -#36 0x0000557eb47d3c58 in main_loop_wait (nonblocking=0) at -../util/main-loop.c:589 -#37 0x0000557eb4218b01 in qemu_main_loop () at ../system/runstate.c:835 -#38 0x0000557eb46df166 in qemu_default_main (opaque=0x0) at ../system/main.c:50 -#39 0x0000557eb46df215 in main (argc=24, argv=0x7ffd94b4f8d8) at -../system/main.c:80 -And here's coroutine trying to acquire read lock: -(gdb) qemu coroutine reader_queue->entries.sqh_first -#0 0x0000557eb47d7068 in qemu_coroutine_switch (from_=0x557eb7aa48b0, -to_=0x7fc537fff508, action=COROUTINE_YIELD) at ../util/coroutine-ucontext.c:321 -#1 0x0000557eb47d4d4a in qemu_coroutine_yield () at -../util/qemu-coroutine.c:339 -#2 0x0000557eb47d56c8 in qemu_co_queue_wait_impl (queue=0x557eb59954c0 -, lock=0x7fc53c57de50, flags=0) at -../util/qemu-coroutine-lock.c:60 -#3 0x0000557eb461fea7 in bdrv_graph_co_rdlock () at ../block/graph-lock.c:231 -#4 0x0000557eb460c81a in graph_lockable_auto_lock (x=0x7fc53c57dee3) at -/home/root/src/qemu/master/include/block/graph-lock.h:213 -#5 0x0000557eb460fa41 in blk_co_do_preadv_part - (blk=0x557eb84c0810, offset=6890553344, bytes=4096, qiov=0x7fc530006988, -qiov_offset=0, flags=BDRV_REQ_REGISTERED_BUF) at ../block/block-backend.c:1339 -#6 0x0000557eb46104d7 in blk_aio_read_entry (opaque=0x7fc530003240) at -../block/block-backend.c:1619 -#7 0x0000557eb47d6c40 in coroutine_trampoline (i0=-1213577040, i1=21886) at -../util/coroutine-ucontext.c:175 -#8 0x00007fc547c2a360 in __start_context () at -../sysdeps/unix/sysv/linux/x86_64/__start_context.S:91 -#9 0x00007ffd94b4ea40 in () -#10 0x0000000000000000 in () -So it looks like main thread is processing job-dismiss request and is -holding write lock taken in block_job_remove_all_bdrv() (frame #20 -above). At the same time iothread spawns a coroutine which performs IO -request. Before the coroutine is spawned, blk_aio_prwv() increases -'in_flight' counter for Blk. Then blk_co_do_preadv_part() (frame #5) is -trying to acquire the read lock. But main thread isn't releasing the -lock as blk_root_drained_poll() returns true since blk->in_flight > 0. -Here's the deadlock. - -Any comments and suggestions on the subject are welcomed. Thanks! -I think this is what the blk_wait_while_drained() call was supposed to -address in blk_co_do_preadv_part(). However, with the use of multiple -I/O threads, this is racy. - -Do you think that in your case we hit the small race window between the -checks in blk_wait_while_drained() and GRAPH_RDLOCK_GUARD()? Or is there -another reason why blk_wait_while_drained() didn't do its job? - -Kevin -At my opinion there is very big race window. Main thread has -eaten graph write lock. After that another coroutine is stalled -within GRAPH_RDLOCK_GUARD() as there is no drain at the moment and only -after that main thread has started drain. That is why Fiona's idea is -looking working. Though this would mean that normally we should always -do that at the moment when we acquire write lock. May be even inside -this function. Den - -Am 02.05.2025 um 19:52 hat Denis V. Lunev geschrieben: -> -On 5/2/25 19:34, Kevin Wolf wrote: -> -> Am 24.04.2025 um 19:32 hat Andrey Drobyshev geschrieben: -> -> > Hi all, -> -> > -> -> > There's a bug in block layer which leads to block graph deadlock. -> -> > Notably, it takes place when blockdev IO is processed within a separate -> -> > iothread. -> -> > -> -> > This was initially caught by our tests, and I was able to reduce it to a -> -> > relatively simple reproducer. Such deadlocks are probably supposed to -> -> > be covered in iotests/graph-changes-while-io, but this deadlock isn't. -> -> > -> -> > Basically what the reproducer does is launches QEMU with a drive having -> -> > 'iothread' option set, creates a chain of 2 snapshots, launches -> -> > block-commit job for a snapshot and then dismisses the job, starting -> -> > from the lower snapshot. If the guest is issuing IO at the same time, -> -> > there's a race in acquiring block graph lock and a potential deadlock. -> -> > -> -> > Here's how it can be reproduced: -> -> > -> -> > 1. Run QEMU: -> -> > > SRCDIR=/path/to/srcdir -> -> > > $SRCDIR/build/qemu-system-x86_64 -enable-kvm \ -> -> > > -machine q35 -cpu Nehalem \ -> -> > > -name guest=alma8-vm,debug-threads=on \ -> -> > > -m 2g -smp 2 \ -> -> > > -nographic -nodefaults \ -> -> > > -qmp unix:/var/run/alma8-qmp.sock,server=on,wait=off \ -> -> > > -serial unix:/var/run/alma8-serial.sock,server=on,wait=off \ -> -> > > -object iothread,id=iothread0 \ -> -> > > -blockdev -> -> > > node-name=disk,driver=qcow2,file.driver=file,file.filename=/path/to/img/alma8.qcow2 -> -> > > \ -> -> > > -device virtio-blk-pci,drive=disk,iothread=iothread0 -> -> > 2. Launch IO (random reads) from within the guest: -> -> > > nc -U /var/run/alma8-serial.sock -> -> > > ... -> -> > > [root@alma8-vm ~]# fio --name=randread --ioengine=libaio --direct=1 -> -> > > --bs=4k --size=1G --numjobs=1 --time_based=1 --runtime=300 -> -> > > --group_reporting --rw=randread --iodepth=1 --filename=/testfile -> -> > 3. Run snapshots creation & removal of lower snapshot operation in a -> -> > loop (script attached): -> -> > > while /bin/true ; do ./remove_lower_snap.sh ; done -> -> > And then it occasionally hangs. -> -> > -> -> > Note: I've tried bisecting this, and looks like deadlock occurs starting -> -> > from the following commit: -> -> > -> -> > (BAD) 5bdbaebcce virtio: Re-enable notifications after drain -> -> > (GOOD) c42c3833e0 virtio-scsi: Attach event vq notifier with no_poll -> -> > -> -> > On the latest v10.0.0 it does hang as well. -> -> > -> -> > -> -> > Here's backtrace of the main thread: -> -> > -> -> > > #0 0x00007fc547d427ce in __ppoll (fds=0x557eb79657b0, nfds=1, -> -> > > timeout=, sigmask=0x0) at -> -> > > ../sysdeps/unix/sysv/linux/ppoll.c:43 -> -> > > #1 0x0000557eb47d955c in qemu_poll_ns (fds=0x557eb79657b0, nfds=1, -> -> > > timeout=-1) at ../util/qemu-timer.c:329 -> -> > > #2 0x0000557eb47b2204 in fdmon_poll_wait (ctx=0x557eb76c5f20, -> -> > > ready_list=0x7ffd94b4edd8, timeout=-1) at ../util/fdmon-poll.c:79 -> -> > > #3 0x0000557eb47b1c45 in aio_poll (ctx=0x557eb76c5f20, blocking=true) -> -> > > at ../util/aio-posix.c:730 -> -> > > #4 0x0000557eb4621edd in bdrv_do_drained_begin (bs=0x557eb795e950, -> -> > > parent=0x0, poll=true) at ../block/io.c:378 -> -> > > #5 0x0000557eb4621f7b in bdrv_drained_begin (bs=0x557eb795e950) at -> -> > > ../block/io.c:391 -> -> > > #6 0x0000557eb45ec125 in bdrv_change_aio_context (bs=0x557eb795e950, -> -> > > ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, -> -> > > tran=0x557eb7a87160, errp=0x0) -> -> > > at ../block.c:7682 -> -> > > #7 0x0000557eb45ebf2b in bdrv_child_change_aio_context -> -> > > (c=0x557eb7964250, ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, -> -> > > tran=0x557eb7a87160, errp=0x0) -> -> > > at ../block.c:7608 -> -> > > #8 0x0000557eb45ec0c4 in bdrv_change_aio_context (bs=0x557eb79575e0, -> -> > > ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, -> -> > > tran=0x557eb7a87160, errp=0x0) -> -> > > at ../block.c:7668 -> -> > > #9 0x0000557eb45ebf2b in bdrv_child_change_aio_context -> -> > > (c=0x557eb7e59110, ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, -> -> > > tran=0x557eb7a87160, errp=0x0) -> -> > > at ../block.c:7608 -> -> > > #10 0x0000557eb45ec0c4 in bdrv_change_aio_context (bs=0x557eb7e51960, -> -> > > ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, -> -> > > tran=0x557eb7a87160, errp=0x0) -> -> > > at ../block.c:7668 -> -> > > #11 0x0000557eb45ebf2b in bdrv_child_change_aio_context -> -> > > (c=0x557eb814ed80, ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, -> -> > > tran=0x557eb7a87160, errp=0x0) -> -> > > at ../block.c:7608 -> -> > > #12 0x0000557eb45ee8e4 in child_job_change_aio_ctx (c=0x557eb7c9d3f0, -> -> > > ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, -> -> > > tran=0x557eb7a87160, errp=0x0) -> -> > > at ../blockjob.c:157 -> -> > > #13 0x0000557eb45ebe2d in bdrv_parent_change_aio_context -> -> > > (c=0x557eb7c9d3f0, ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, -> -> > > tran=0x557eb7a87160, errp=0x0) -> -> > > at ../block.c:7592 -> -> > > #14 0x0000557eb45ec06b in bdrv_change_aio_context (bs=0x557eb7d74310, -> -> > > ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, -> -> > > tran=0x557eb7a87160, errp=0x0) -> -> > > at ../block.c:7661 -> -> > > #15 0x0000557eb45dcd7e in bdrv_child_cb_change_aio_ctx -> -> > > (child=0x557eb8565af0, ctx=0x557eb76c5f20, visited=0x557eb7e06b60 -> -> > > = {...}, tran=0x557eb7a87160, errp=0x0) at ../block.c:1234 -> -> > > #16 0x0000557eb45ebe2d in bdrv_parent_change_aio_context -> -> > > (c=0x557eb8565af0, ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, -> -> > > tran=0x557eb7a87160, errp=0x0) -> -> > > at ../block.c:7592 -> -> > > #17 0x0000557eb45ec06b in bdrv_change_aio_context (bs=0x557eb79575e0, -> -> > > ctx=0x557eb76c5f20, visited=0x557eb7e06b60 = {...}, -> -> > > tran=0x557eb7a87160, errp=0x0) -> -> > > at ../block.c:7661 -> -> > > #18 0x0000557eb45ec1f3 in bdrv_try_change_aio_context -> -> > > (bs=0x557eb79575e0, ctx=0x557eb76c5f20, ignore_child=0x0, errp=0x0) at -> -> > > ../block.c:7715 -> -> > > #19 0x0000557eb45e1b15 in bdrv_root_unref_child (child=0x557eb7966f30) -> -> > > at ../block.c:3317 -> -> > > #20 0x0000557eb45eeaa8 in block_job_remove_all_bdrv -> -> > > (job=0x557eb7952800) at ../blockjob.c:209 -> -> > > #21 0x0000557eb45ee641 in block_job_free (job=0x557eb7952800) at -> -> > > ../blockjob.c:82 -> -> > > #22 0x0000557eb45f17af in job_unref_locked (job=0x557eb7952800) at -> -> > > ../job.c:474 -> -> > > #23 0x0000557eb45f257d in job_do_dismiss_locked (job=0x557eb7952800) at -> -> > > ../job.c:771 -> -> > > #24 0x0000557eb45f25fe in job_dismiss_locked (jobptr=0x7ffd94b4f400, -> -> > > errp=0x7ffd94b4f488) at ../job.c:783 -> -> > > --Type for more, q to quit, c to continue without paging-- -> -> > > #25 0x0000557eb45d8e84 in qmp_job_dismiss (id=0x557eb7aa42b0 -> -> > > "commit-snap1", errp=0x7ffd94b4f488) at ../job-qmp.c:138 -> -> > > #26 0x0000557eb472f6a3 in qmp_marshal_job_dismiss (args=0x7fc52c00a3b0, -> -> > > ret=0x7fc53c880da8, errp=0x7fc53c880da0) at qapi/qapi-commands-job.c:221 -> -> > > #27 0x0000557eb47a35f3 in do_qmp_dispatch_bh (opaque=0x7fc53c880e40) at -> -> > > ../qapi/qmp-dispatch.c:128 -> -> > > #28 0x0000557eb47d1cd2 in aio_bh_call (bh=0x557eb79568f0) at -> -> > > ../util/async.c:172 -> -> > > #29 0x0000557eb47d1df5 in aio_bh_poll (ctx=0x557eb76c0200) at -> -> > > ../util/async.c:219 -> -> > > #30 0x0000557eb47b12f3 in aio_dispatch (ctx=0x557eb76c0200) at -> -> > > ../util/aio-posix.c:436 -> -> > > #31 0x0000557eb47d2266 in aio_ctx_dispatch (source=0x557eb76c0200, -> -> > > callback=0x0, user_data=0x0) at ../util/async.c:361 -> -> > > #32 0x00007fc549232f4f in g_main_dispatch (context=0x557eb76c6430) at -> -> > > ../glib/gmain.c:3364 -> -> > > #33 g_main_context_dispatch (context=0x557eb76c6430) at -> -> > > ../glib/gmain.c:4079 -> -> > > #34 0x0000557eb47d3ab1 in glib_pollfds_poll () at -> -> > > ../util/main-loop.c:287 -> -> > > #35 0x0000557eb47d3b38 in os_host_main_loop_wait (timeout=0) at -> -> > > ../util/main-loop.c:310 -> -> > > #36 0x0000557eb47d3c58 in main_loop_wait (nonblocking=0) at -> -> > > ../util/main-loop.c:589 -> -> > > #37 0x0000557eb4218b01 in qemu_main_loop () at ../system/runstate.c:835 -> -> > > #38 0x0000557eb46df166 in qemu_default_main (opaque=0x0) at -> -> > > ../system/main.c:50 -> -> > > #39 0x0000557eb46df215 in main (argc=24, argv=0x7ffd94b4f8d8) at -> -> > > ../system/main.c:80 -> -> > -> -> > And here's coroutine trying to acquire read lock: -> -> > -> -> > > (gdb) qemu coroutine reader_queue->entries.sqh_first -> -> > > #0 0x0000557eb47d7068 in qemu_coroutine_switch (from_=0x557eb7aa48b0, -> -> > > to_=0x7fc537fff508, action=COROUTINE_YIELD) at -> -> > > ../util/coroutine-ucontext.c:321 -> -> > > #1 0x0000557eb47d4d4a in qemu_coroutine_yield () at -> -> > > ../util/qemu-coroutine.c:339 -> -> > > #2 0x0000557eb47d56c8 in qemu_co_queue_wait_impl (queue=0x557eb59954c0 -> -> > > , lock=0x7fc53c57de50, flags=0) at -> -> > > ../util/qemu-coroutine-lock.c:60 -> -> > > #3 0x0000557eb461fea7 in bdrv_graph_co_rdlock () at -> -> > > ../block/graph-lock.c:231 -> -> > > #4 0x0000557eb460c81a in graph_lockable_auto_lock (x=0x7fc53c57dee3) -> -> > > at /home/root/src/qemu/master/include/block/graph-lock.h:213 -> -> > > #5 0x0000557eb460fa41 in blk_co_do_preadv_part -> -> > > (blk=0x557eb84c0810, offset=6890553344, bytes=4096, -> -> > > qiov=0x7fc530006988, qiov_offset=0, flags=BDRV_REQ_REGISTERED_BUF) at -> -> > > ../block/block-backend.c:1339 -> -> > > #6 0x0000557eb46104d7 in blk_aio_read_entry (opaque=0x7fc530003240) at -> -> > > ../block/block-backend.c:1619 -> -> > > #7 0x0000557eb47d6c40 in coroutine_trampoline (i0=-1213577040, -> -> > > i1=21886) at ../util/coroutine-ucontext.c:175 -> -> > > #8 0x00007fc547c2a360 in __start_context () at -> -> > > ../sysdeps/unix/sysv/linux/x86_64/__start_context.S:91 -> -> > > #9 0x00007ffd94b4ea40 in () -> -> > > #10 0x0000000000000000 in () -> -> > -> -> > So it looks like main thread is processing job-dismiss request and is -> -> > holding write lock taken in block_job_remove_all_bdrv() (frame #20 -> -> > above). At the same time iothread spawns a coroutine which performs IO -> -> > request. Before the coroutine is spawned, blk_aio_prwv() increases -> -> > 'in_flight' counter for Blk. Then blk_co_do_preadv_part() (frame #5) is -> -> > trying to acquire the read lock. But main thread isn't releasing the -> -> > lock as blk_root_drained_poll() returns true since blk->in_flight > 0. -> -> > Here's the deadlock. -> -> > -> -> > Any comments and suggestions on the subject are welcomed. Thanks! -> -> I think this is what the blk_wait_while_drained() call was supposed to -> -> address in blk_co_do_preadv_part(). However, with the use of multiple -> -> I/O threads, this is racy. -> -> -> -> Do you think that in your case we hit the small race window between the -> -> checks in blk_wait_while_drained() and GRAPH_RDLOCK_GUARD()? Or is there -> -> another reason why blk_wait_while_drained() didn't do its job? -> -> -> -At my opinion there is very big race window. Main thread has -> -eaten graph write lock. After that another coroutine is stalled -> -within GRAPH_RDLOCK_GUARD() as there is no drain at the moment and only -> -after that main thread has started drain. -You're right, I confused taking the write lock with draining there. - -> -That is why Fiona's idea is looking working. Though this would mean -> -that normally we should always do that at the moment when we acquire -> -write lock. May be even inside this function. -I actually see now that not all of my graph locking patches were merged. -At least I did have the thought that bdrv_drained_begin() must be marked -GRAPH_UNLOCKED because it polls. That means that calling it from inside -bdrv_try_change_aio_context() is actually forbidden (and that's the part -I didn't see back then because it doesn't have TSA annotations). - -If you refactor the code to move the drain out to before the lock is -taken, I think you end up with Fiona's patch, except you'll remove the -forbidden inner drain and add more annotations for some functions and -clarify the rules around them. I don't know, but I wouldn't be surprised -if along the process we find other bugs, too. - -So Fiona's drain looks right to me, but we should probably approach it -more systematically. - -Kevin - diff --git a/classification_output/01/instruction/6117378 b/classification_output/01/instruction/6117378 deleted file mode 100644 index 5dad058d5..000000000 --- a/classification_output/01/instruction/6117378 +++ /dev/null @@ -1,31 +0,0 @@ -instruction: 0.693 -mistranslation: 0.533 -other: 0.519 -semantic: 0.454 - -[Qemu-devel] [BUG] network : windows os lost ip address of the network card  in some cases - -we found this problem for a long time 。For example, if we has three network -card in virtual xml file ,such as "network connection 1" / "network connection -2"/"network connection 3" 。 - -Echo network card has own ip address ,such as 192.168.1.1 / 2.1 /3.1 , when -delete the first card ,reboot the windows virtual os, then this problem -happened ! - - - - -we found that the sencond network card will replace the first one , then the -ip address of "network connection 2 " become 192.168.1.1 。 - - -Our third party users began to complain about this bug 。All the business of the -second ip lost !!! - -I mean both of windows and linux has this bug , we solve this bug in linux -throught bonding netcrad pci and mac address 。 - -There is no good solution on windows os . thera are ? we implemented a plan to -resumption of IP by QGA. Is there a better way ? - diff --git a/classification_output/01/instruction/62179944 b/classification_output/01/instruction/62179944 new file mode 100644 index 000000000..5dad058d5 --- /dev/null +++ b/classification_output/01/instruction/62179944 @@ -0,0 +1,31 @@ +instruction: 0.693 +mistranslation: 0.533 +other: 0.519 +semantic: 0.454 + +[Qemu-devel] [BUG] network : windows os lost ip address of the network card  in some cases + +we found this problem for a long time 。For example, if we has three network +card in virtual xml file ,such as "network connection 1" / "network connection +2"/"network connection 3" 。 + +Echo network card has own ip address ,such as 192.168.1.1 / 2.1 /3.1 , when +delete the first card ,reboot the windows virtual os, then this problem +happened ! + + + + +we found that the sencond network card will replace the first one , then the +ip address of "network connection 2 " become 192.168.1.1 。 + + +Our third party users began to complain about this bug 。All the business of the +second ip lost !!! + +I mean both of windows and linux has this bug , we solve this bug in linux +throught bonding netcrad pci and mac address 。 + +There is no good solution on windows os . thera are ? we implemented a plan to +resumption of IP by QGA. Is there a better way ? + diff --git a/classification_output/01/instruction/63565653 b/classification_output/01/instruction/63565653 new file mode 100644 index 000000000..dfac92bf4 --- /dev/null +++ b/classification_output/01/instruction/63565653 @@ -0,0 +1,49 @@ +instruction: 0.905 +other: 0.898 +semantic: 0.825 +mistranslation: 0.462 + +[Qemu-devel] [BUG]pcibus_reset assertion failure on guest reboot + +Qemu-2.6.2 + +Start a vm with vhost-net , do reboot and hot-unplug viritio-net nic in short +time, we touch +pcibus_reset assertion failure. + +Here is qemu log: +22:29:46.359386+08:00 acpi_pm1_cnt_write -> guest do soft power off +22:29:46.785310+08:00 qemu_devices_reset +22:29:46.788093+08:00 virtio_pci_device_unplugged -> virtio net unpluged +22:29:46.803427+08:00 pcibus_reset: Assertion `bus->irq_count[i] == 0' failed. + +Here is stack info: +(gdb) bt +#0 0x00007f9a336795d7 in raise () from /usr/lib64/libc.so.6 +#1 0x00007f9a3367acc8 in abort () from /usr/lib64/libc.so.6 +#2 0x00007f9a33672546 in __assert_fail_base () from /usr/lib64/libc.so.6 +#3 0x00007f9a336725f2 in __assert_fail () from /usr/lib64/libc.so.6 +#4 0x0000000000641884 in pcibus_reset (qbus=0x29eee60) at hw/pci/pci.c:283 +#5 0x00000000005bfc30 in qbus_reset_one (bus=0x29eee60, opaque=) at hw/core/qdev.c:319 +#6 0x00000000005c1b19 in qdev_walk_children (dev=0x29ed2b0, pre_devfn=0x0, +pre_busfn=0x0, post_devfn=0x5c2440 ... +#7 0x00000000005c1c59 in qbus_walk_children (bus=0x2736f80, pre_devfn=0x0, +pre_busfn=0x0, post_devfn=0x5c2440 ... +#8 0x00000000005513f5 in qemu_devices_reset () at vl.c:1998 +#9 0x00000000004cab9d in pc_machine_reset () at +/home/abuild/rpmbuild/BUILD/qemu-kvm-2.6.0/hw/i386/pc.c:1976 +#10 0x000000000055148b in qemu_system_reset (address@hidden) at vl.c:2011 +#11 0x000000000055164f in main_loop_should_exit () at vl.c:2169 +#12 0x0000000000551719 in main_loop () at vl.c:2212 +#13 0x000000000041c9a8 in main (argc=, argv=, +envp=) at vl.c:5130 +(gdb) f 4 +... +(gdb) p bus->irq_count[0] +$6 = 1 + +Seems pci_update_irq_disabled doesn't work well + +can anyone help? + diff --git a/classification_output/01/instruction/70868267 b/classification_output/01/instruction/70868267 new file mode 100644 index 000000000..ffcf905b4 --- /dev/null +++ b/classification_output/01/instruction/70868267 @@ -0,0 +1,40 @@ +instruction: 0.778 +semantic: 0.635 +mistranslation: 0.537 +other: 0.236 + +[Qemu-devel] [BUG] Failed to compile using gcc7.1 + +Hi all, + +After upgrading gcc from 6.3.1 to 7.1.1, qemu can't be compiled with gcc. + +The error is: + +------ + CC block/blkdebug.o +block/blkdebug.c: In function 'blkdebug_refresh_filename': +block/blkdebug.c:693:31: error: '%s' directive output may be truncated +writing up to 4095 bytes into a region of size 4086 +[-Werror=format-truncation=] +"blkdebug:%s:%s", s->config_file ?: "", + ^~ +In file included from /usr/include/stdio.h:939:0, + from /home/adam/qemu/include/qemu/osdep.h:68, + from block/blkdebug.c:25: +/usr/include/bits/stdio2.h:64:10: note: '__builtin___snprintf_chk' +output 11 or more bytes (assuming 4106) into a destination of size 4096 +return __builtin___snprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1, + ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + __bos (__s), __fmt, __va_arg_pack ()); + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +cc1: all warnings being treated as errors +make: *** [/home/adam/qemu/rules.mak:69: block/blkdebug.o] Error 1 +------ + +It seems that gcc 7 is introducing more restrict check for printf. +If using clang, although there are some extra warning, it can at least +pass the compile. +Thanks, +Qu + diff --git a/classification_output/01/instruction/73660729 b/classification_output/01/instruction/73660729 new file mode 100644 index 000000000..92d85cc82 --- /dev/null +++ b/classification_output/01/instruction/73660729 @@ -0,0 +1,31 @@ +instruction: 0.753 +semantic: 0.698 +mistranslation: 0.633 +other: 0.620 + +[BUG]The latest qemu crashed when I tested cxl + +I test cxl with the patch:[v11,0/2] arm/virt: + CXL support via pxb_cxl. +https://patchwork.kernel.org/project/cxl/cover/20220616141950.23374-1-Jonathan.Cameron@huawei.com/ +But the qemu crashed,and showing an error: +qemu-system-aarch64: ../hw/arm/virt.c:1735: virt_get_high_memmap_enabled: + Assertion `ARRAY_SIZE(extended_memmap) - VIRT_LOWMEMMAP_LAST == ARRAY_SIZE(enabled_array)' failed. +Then I modify the patch to fix the bug: +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index ea2413a0ba..3d4cee3491 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1710,6 +1730,7 @@ static inline bool *virt_get_high_memmap_enabled(VirtMachineState + *vms, +&vms->highmem_redists, +&vms->highmem_ecam, +&vms->highmem_mmio, ++ &vms->cxl_devices_state.is_enabled, +}; +Now qemu works good. +Could you tell me when the patch( +arm/virt: + CXL support via pxb_cxl +) will be merged into upstream? + diff --git a/classification_output/01/instruction/7647456 b/classification_output/01/instruction/7647456 deleted file mode 100644 index d887fe7b5..000000000 --- a/classification_output/01/instruction/7647456 +++ /dev/null @@ -1,110 +0,0 @@ -instruction: 0.768 -other: 0.737 -semantic: 0.669 -mistranslation: 0.652 - -[Qemu-devel] Can I have someone's feedback on [bug 1809075] Concurrency bug on keyboard events: capslock LED messing up keycode streams causes character misses at guest kernel - -Hi everyone. -Can I please have someone's feedback on this bug? -https://bugs.launchpad.net/qemu/+bug/1809075 -Briefly, guest OS loses characters sent to it via vnc. And I spot the -bug in relation to ps2 driver. -I'm thinking of possible fixes and I might want to use a memory barrier. -But I would really like to have some suggestion from a qemu developer -first. For example, can we brutally drop capslock LED key events in ps2 -queue? -It is actually relevant to openQA, an automated QA tool for openSUSE. -And this bug blocks a few test cases for us. -Thank you in advance! - -Kind regards, -Gao Zhiyuan - -Cc'ing Marc-André & Gerd. - -On 12/19/18 10:31 AM, Gao Zhiyuan wrote: -> -Hi everyone. -> -> -Can I please have someone's feedback on this bug? -> -https://bugs.launchpad.net/qemu/+bug/1809075 -> -Briefly, guest OS loses characters sent to it via vnc. And I spot the -> -bug in relation to ps2 driver. -> -> -I'm thinking of possible fixes and I might want to use a memory barrier. -> -But I would really like to have some suggestion from a qemu developer -> -first. For example, can we brutally drop capslock LED key events in ps2 -> -queue? -> -> -It is actually relevant to openQA, an automated QA tool for openSUSE. -> -And this bug blocks a few test cases for us. -> -> -Thank you in advance! -> -> -Kind regards, -> -Gao Zhiyuan -> - -On Thu, Jan 03, 2019 at 12:05:54PM +0100, Philippe Mathieu-Daudé wrote: -> -Cc'ing Marc-André & Gerd. -> -> -On 12/19/18 10:31 AM, Gao Zhiyuan wrote: -> -> Hi everyone. -> -> -> -> Can I please have someone's feedback on this bug? -> -> -https://bugs.launchpad.net/qemu/+bug/1809075 -> -> Briefly, guest OS loses characters sent to it via vnc. And I spot the -> -> bug in relation to ps2 driver. -> -> -> -> I'm thinking of possible fixes and I might want to use a memory barrier. -> -> But I would really like to have some suggestion from a qemu developer -> -> first. For example, can we brutally drop capslock LED key events in ps2 -> -> queue? -There is no "capslock LED key event". 0xfa is KBD_REPLY_ACK, and the -device queues it in response to guest port writes. Yes, the ack can -race with actual key events. But IMO that isn't a bug in qemu. - -Probably the linux kernel just throws away everything until it got the -ack for the port write, and that way the key event gets lost. On -physical hardware you will not notice because it is next to impossible -to type fast enough to hit the race window. - -So, go fix the kernel. - -Alternatively fix vncdotool to send uppercase letters properly with -shift key pressed. Then qemu wouldn't generate capslock key events -(that happens because qemu thinks guest and host capslock state is out -of sync) and the guests's capslock led update request wouldn't get into -the way. - -cheers, - Gerd - diff --git a/classification_output/01/instruction/7658242 b/classification_output/01/instruction/7658242 deleted file mode 100644 index 3ff255be0..000000000 --- a/classification_output/01/instruction/7658242 +++ /dev/null @@ -1,1125 +0,0 @@ -instruction: 0.775 -other: 0.771 -mistranslation: 0.719 -semantic: 0.673 - -[BUG] hw/i386/pc.c: CXL Fixed Memory Window should not reserve e820 in bios - -Early-boot e820 records will be inserted by the bios/efi/early boot -software and be reported to the kernel via insert_resource. Later, when -CXL drivers iterate through the regions again, they will insert another -resource and make the RESERVED memory area a child. - -This RESERVED memory area causes the memory region to become unusable, -and as a result attempting to create memory regions with - - `cxl create-region ...` - -Will fail due to the RESERVED area intersecting with the CXL window. - - -During boot the following traceback is observed: - -0xffffffff81101650 in insert_resource_expand_to_fit () -0xffffffff83d964c5 in e820__reserve_resources_late () -0xffffffff83e03210 in pcibios_resource_survey () -0xffffffff83e04f4a in pcibios_init () - -Which produces a call to reserve the CFMWS area: - -(gdb) p *new -$54 = {start = 0x290000000, end = 0x2cfffffff, name = "Reserved", - flags = 0x200, desc = 0x7, parent = 0x0, sibling = 0x0, - child = 0x0} - -Later the Kernel parses ACPI tables and reserves the exact same area as -the CXL Fixed Memory Window. The use of `insert_resource_conflict` -retains the RESERVED region and makes it a child of the new region. - -0xffffffff811016a4 in insert_resource_conflict () - insert_resource () -0xffffffff81a81389 in cxl_parse_cfmws () -0xffffffff818c4a81 in call_handler () - acpi_parse_entries_array () - -(gdb) p/x *new -$59 = {start = 0x290000000, end = 0x2cfffffff, name = "CXL Window 0", - flags = 0x200, desc = 0x0, parent = 0x0, sibling = 0x0, - child = 0x0} - -This produces the following output in /proc/iomem: - -590000000-68fffffff : CXL Window 0 - 590000000-68fffffff : Reserved - -This reserved area causes `get_free_mem_region()` to fail due to a check -against `__region_intersects()`. Due to this reserved area, the -intersect check will only ever return REGION_INTERSECTS, which causes -`cxl create-region` to always fail. - -Signed-off-by: Gregory Price ---- - hw/i386/pc.c | 2 -- - 1 file changed, 2 deletions(-) - -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 566accf7e6..5bf5465a21 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, - hwaddr cxl_size = MiB; - - cxl_base = pc_get_cxl_range_start(pcms); -- e820_add_entry(cxl_base, cxl_size, E820_RESERVED); - memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); - memory_region_add_subregion(system_memory, cxl_base, mr); - cxl_resv_end = cxl_base + cxl_size; -@@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, - memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, fw, - "cxl-fixed-memory-region", fw->size); - memory_region_add_subregion(system_memory, fw->base, &fw->mr); -- e820_add_entry(fw->base, fw->size, E820_RESERVED); - cxl_fmw_base += fw->size; - cxl_resv_end = cxl_fmw_base; - } --- -2.37.3 - -Early-boot e820 records will be inserted by the bios/efi/early boot -software and be reported to the kernel via insert_resource. Later, when -CXL drivers iterate through the regions again, they will insert another -resource and make the RESERVED memory area a child. - -This RESERVED memory area causes the memory region to become unusable, -and as a result attempting to create memory regions with - - `cxl create-region ...` - -Will fail due to the RESERVED area intersecting with the CXL window. - - -During boot the following traceback is observed: - -0xffffffff81101650 in insert_resource_expand_to_fit () -0xffffffff83d964c5 in e820__reserve_resources_late () -0xffffffff83e03210 in pcibios_resource_survey () -0xffffffff83e04f4a in pcibios_init () - -Which produces a call to reserve the CFMWS area: - -(gdb) p *new -$54 = {start = 0x290000000, end = 0x2cfffffff, name = "Reserved", - flags = 0x200, desc = 0x7, parent = 0x0, sibling = 0x0, - child = 0x0} - -Later the Kernel parses ACPI tables and reserves the exact same area as -the CXL Fixed Memory Window. The use of `insert_resource_conflict` -retains the RESERVED region and makes it a child of the new region. - -0xffffffff811016a4 in insert_resource_conflict () - insert_resource () -0xffffffff81a81389 in cxl_parse_cfmws () -0xffffffff818c4a81 in call_handler () - acpi_parse_entries_array () - -(gdb) p/x *new -$59 = {start = 0x290000000, end = 0x2cfffffff, name = "CXL Window 0", - flags = 0x200, desc = 0x0, parent = 0x0, sibling = 0x0, - child = 0x0} - -This produces the following output in /proc/iomem: - -590000000-68fffffff : CXL Window 0 - 590000000-68fffffff : Reserved - -This reserved area causes `get_free_mem_region()` to fail due to a check -against `__region_intersects()`. Due to this reserved area, the -intersect check will only ever return REGION_INTERSECTS, which causes -`cxl create-region` to always fail. - -Signed-off-by: Gregory Price ---- - hw/i386/pc.c | 2 -- - 1 file changed, 2 deletions(-) - -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 566accf7e6..5bf5465a21 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, - hwaddr cxl_size = MiB; -cxl_base = pc_get_cxl_range_start(pcms); -- e820_add_entry(cxl_base, cxl_size, E820_RESERVED); - memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); - memory_region_add_subregion(system_memory, cxl_base, mr); - cxl_resv_end = cxl_base + cxl_size; -@@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, - memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, -fw, - "cxl-fixed-memory-region", fw->size); - memory_region_add_subregion(system_memory, fw->base, &fw->mr); -Or will this be subregion of cxl_base? - -Thanks, -Pankaj -- e820_add_entry(fw->base, fw->size, E820_RESERVED); - cxl_fmw_base += fw->size; - cxl_resv_end = cxl_fmw_base; - } - -> -> - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); -> -> memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); -> -> memory_region_add_subregion(system_memory, cxl_base, mr); -> -> cxl_resv_end = cxl_base + cxl_size; -> -> @@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, -> -> memory_region_init_io(&fw->mr, OBJECT(machine), -> -> &cfmws_ops, fw, -> -> "cxl-fixed-memory-region", -> -> fw->size); -> -> memory_region_add_subregion(system_memory, fw->base, -> -> &fw->mr); -> -> -Or will this be subregion of cxl_base? -> -> -Thanks, -> -Pankaj -The memory region backing this memory area still has to be initialized -and added in the QEMU system, but it will now be initialized for use by -linux after PCI/ACPI setup occurs and the CXL driver discovers it via -CDAT. - -It's also still possible to assign this area a static memory region at -bool by setting up the SRATs in the ACPI tables, but that patch is not -upstream yet. - -On Tue, Oct 18, 2022 at 5:14 AM Gregory Price wrote: -> -> -Early-boot e820 records will be inserted by the bios/efi/early boot -> -software and be reported to the kernel via insert_resource. Later, when -> -CXL drivers iterate through the regions again, they will insert another -> -resource and make the RESERVED memory area a child. -I have already sent a patch -https://www.mail-archive.com/qemu-devel@nongnu.org/msg882012.html -. -When the patch is applied, there would not be any reserved entries -even with passing E820_RESERVED . -So this patch needs to be evaluated in the light of the above patch I -sent. Once you apply my patch, does the issue still exist? - -> -> -This RESERVED memory area causes the memory region to become unusable, -> -and as a result attempting to create memory regions with -> -> -`cxl create-region ...` -> -> -Will fail due to the RESERVED area intersecting with the CXL window. -> -> -> -During boot the following traceback is observed: -> -> -0xffffffff81101650 in insert_resource_expand_to_fit () -> -0xffffffff83d964c5 in e820__reserve_resources_late () -> -0xffffffff83e03210 in pcibios_resource_survey () -> -0xffffffff83e04f4a in pcibios_init () -> -> -Which produces a call to reserve the CFMWS area: -> -> -(gdb) p *new -> -$54 = {start = 0x290000000, end = 0x2cfffffff, name = "Reserved", -> -flags = 0x200, desc = 0x7, parent = 0x0, sibling = 0x0, -> -child = 0x0} -> -> -Later the Kernel parses ACPI tables and reserves the exact same area as -> -the CXL Fixed Memory Window. The use of `insert_resource_conflict` -> -retains the RESERVED region and makes it a child of the new region. -> -> -0xffffffff811016a4 in insert_resource_conflict () -> -insert_resource () -> -0xffffffff81a81389 in cxl_parse_cfmws () -> -0xffffffff818c4a81 in call_handler () -> -acpi_parse_entries_array () -> -> -(gdb) p/x *new -> -$59 = {start = 0x290000000, end = 0x2cfffffff, name = "CXL Window 0", -> -flags = 0x200, desc = 0x0, parent = 0x0, sibling = 0x0, -> -child = 0x0} -> -> -This produces the following output in /proc/iomem: -> -> -590000000-68fffffff : CXL Window 0 -> -590000000-68fffffff : Reserved -> -> -This reserved area causes `get_free_mem_region()` to fail due to a check -> -against `__region_intersects()`. Due to this reserved area, the -> -intersect check will only ever return REGION_INTERSECTS, which causes -> -`cxl create-region` to always fail. -> -> -Signed-off-by: Gregory Price -> ---- -> -hw/i386/pc.c | 2 -- -> -1 file changed, 2 deletions(-) -> -> -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -> -index 566accf7e6..5bf5465a21 100644 -> ---- a/hw/i386/pc.c -> -+++ b/hw/i386/pc.c -> -@@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, -> -hwaddr cxl_size = MiB; -> -> -cxl_base = pc_get_cxl_range_start(pcms); -> -- e820_add_entry(cxl_base, cxl_size, E820_RESERVED); -> -memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); -> -memory_region_add_subregion(system_memory, cxl_base, mr); -> -cxl_resv_end = cxl_base + cxl_size; -> -@@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, -> -memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, -> -fw, -> -"cxl-fixed-memory-region", fw->size); -> -memory_region_add_subregion(system_memory, fw->base, -> -&fw->mr); -> -- e820_add_entry(fw->base, fw->size, E820_RESERVED); -> -cxl_fmw_base += fw->size; -> -cxl_resv_end = cxl_fmw_base; -> -} -> --- -> -2.37.3 -> - -This patch does not resolve the issue, reserved entries are still created. -[    0.000000] BIOS-e820: [mem 0x0000000280000000-0x00000002800fffff] reserved -[    0.000000] BIOS-e820: [mem 0x0000000290000000-0x000000029fffffff] reserved -# cat /proc/iomem -290000000-29fffffff : CXL Window 0 -  290000000-29fffffff : Reserved -# cxl create-region -m -d decoder0.0 -w 1 -g 256 mem0 -cxl region: create_region: region0: set_size failed: Numerical result out of range -cxl region: cmd_create_region: created 0 regions -On Tue, Oct 18, 2022 at 2:05 AM Ani Sinha < -ani@anisinha.ca -> wrote: -On Tue, Oct 18, 2022 at 5:14 AM Gregory Price < -gourry.memverge@gmail.com -> wrote: -> -> Early-boot e820 records will be inserted by the bios/efi/early boot -> software and be reported to the kernel via insert_resource.  Later, when -> CXL drivers iterate through the regions again, they will insert another -> resource and make the RESERVED memory area a child. -I have already sent a patch -https://www.mail-archive.com/qemu-devel@nongnu.org/msg882012.html -. -When the patch is applied, there would not be any reserved entries -even with passing E820_RESERVED . -So this patch needs to be evaluated in the light of the above patch I -sent. Once you apply my patch, does the issue still exist? -> -> This RESERVED memory area causes the memory region to become unusable, -> and as a result attempting to create memory regions with -> ->     `cxl create-region ...` -> -> Will fail due to the RESERVED area intersecting with the CXL window. -> -> -> During boot the following traceback is observed: -> -> 0xffffffff81101650 in insert_resource_expand_to_fit () -> 0xffffffff83d964c5 in e820__reserve_resources_late () -> 0xffffffff83e03210 in pcibios_resource_survey () -> 0xffffffff83e04f4a in pcibios_init () -> -> Which produces a call to reserve the CFMWS area: -> -> (gdb) p *new -> $54 = {start = 0x290000000, end = 0x2cfffffff, name = "Reserved", ->        flags = 0x200, desc = 0x7, parent = 0x0, sibling = 0x0, ->        child = 0x0} -> -> Later the Kernel parses ACPI tables and reserves the exact same area as -> the CXL Fixed Memory Window.  The use of `insert_resource_conflict` -> retains the RESERVED region and makes it a child of the new region. -> -> 0xffffffff811016a4 in insert_resource_conflict () ->                       insert_resource () -> 0xffffffff81a81389 in cxl_parse_cfmws () -> 0xffffffff818c4a81 in call_handler () ->                       acpi_parse_entries_array () -> -> (gdb) p/x *new -> $59 = {start = 0x290000000, end = 0x2cfffffff, name = "CXL Window 0", ->        flags = 0x200, desc = 0x0, parent = 0x0, sibling = 0x0, ->        child = 0x0} -> -> This produces the following output in /proc/iomem: -> -> 590000000-68fffffff : CXL Window 0 ->   590000000-68fffffff : Reserved -> -> This reserved area causes `get_free_mem_region()` to fail due to a check -> against `__region_intersects()`.  Due to this reserved area, the -> intersect check will only ever return REGION_INTERSECTS, which causes -> `cxl create-region` to always fail. -> -> Signed-off-by: Gregory Price < -gregory.price@memverge.com -> -> --- ->  hw/i386/pc.c | 2 -- ->  1 file changed, 2 deletions(-) -> -> diff --git a/hw/i386/pc.c b/hw/i386/pc.c -> index 566accf7e6..5bf5465a21 100644 -> --- a/hw/i386/pc.c -> +++ b/hw/i386/pc.c -> @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, ->          hwaddr cxl_size = MiB; -> ->          cxl_base = pc_get_cxl_range_start(pcms); -> -        e820_add_entry(cxl_base, cxl_size, E820_RESERVED); ->          memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); ->          memory_region_add_subregion(system_memory, cxl_base, mr); ->          cxl_resv_end = cxl_base + cxl_size; -> @@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, ->                  memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, fw, ->                                        "cxl-fixed-memory-region", fw->size); ->                  memory_region_add_subregion(system_memory, fw->base, &fw->mr); -> -                e820_add_entry(fw->base, fw->size, E820_RESERVED); ->                  cxl_fmw_base += fw->size; ->                  cxl_resv_end = cxl_fmw_base; ->              } -> -- -> 2.37.3 -> - -+Gerd Hoffmann - -On Tue, Oct 18, 2022 at 8:16 PM Gregory Price wrote: -> -> -This patch does not resolve the issue, reserved entries are still created. -> -> -[ 0.000000] BIOS-e820: [mem 0x0000000280000000-0x00000002800fffff] reserved -> -[ 0.000000] BIOS-e820: [mem 0x0000000290000000-0x000000029fffffff] reserved -> -> -# cat /proc/iomem -> -290000000-29fffffff : CXL Window 0 -> -290000000-29fffffff : Reserved -> -> -# cxl create-region -m -d decoder0.0 -w 1 -g 256 mem0 -> -cxl region: create_region: region0: set_size failed: Numerical result out of -> -range -> -cxl region: cmd_create_region: created 0 regions -> -> -On Tue, Oct 18, 2022 at 2:05 AM Ani Sinha wrote: -> -> -> -> On Tue, Oct 18, 2022 at 5:14 AM Gregory Price -> -> wrote: -> -> > -> -> > Early-boot e820 records will be inserted by the bios/efi/early boot -> -> > software and be reported to the kernel via insert_resource. Later, when -> -> > CXL drivers iterate through the regions again, they will insert another -> -> > resource and make the RESERVED memory area a child. -> -> -> -> I have already sent a patch -> -> -https://www.mail-archive.com/qemu-devel@nongnu.org/msg882012.html -. -> -> When the patch is applied, there would not be any reserved entries -> -> even with passing E820_RESERVED . -> -> So this patch needs to be evaluated in the light of the above patch I -> -> sent. Once you apply my patch, does the issue still exist? -> -> -> -> > -> -> > This RESERVED memory area causes the memory region to become unusable, -> -> > and as a result attempting to create memory regions with -> -> > -> -> > `cxl create-region ...` -> -> > -> -> > Will fail due to the RESERVED area intersecting with the CXL window. -> -> > -> -> > -> -> > During boot the following traceback is observed: -> -> > -> -> > 0xffffffff81101650 in insert_resource_expand_to_fit () -> -> > 0xffffffff83d964c5 in e820__reserve_resources_late () -> -> > 0xffffffff83e03210 in pcibios_resource_survey () -> -> > 0xffffffff83e04f4a in pcibios_init () -> -> > -> -> > Which produces a call to reserve the CFMWS area: -> -> > -> -> > (gdb) p *new -> -> > $54 = {start = 0x290000000, end = 0x2cfffffff, name = "Reserved", -> -> > flags = 0x200, desc = 0x7, parent = 0x0, sibling = 0x0, -> -> > child = 0x0} -> -> > -> -> > Later the Kernel parses ACPI tables and reserves the exact same area as -> -> > the CXL Fixed Memory Window. The use of `insert_resource_conflict` -> -> > retains the RESERVED region and makes it a child of the new region. -> -> > -> -> > 0xffffffff811016a4 in insert_resource_conflict () -> -> > insert_resource () -> -> > 0xffffffff81a81389 in cxl_parse_cfmws () -> -> > 0xffffffff818c4a81 in call_handler () -> -> > acpi_parse_entries_array () -> -> > -> -> > (gdb) p/x *new -> -> > $59 = {start = 0x290000000, end = 0x2cfffffff, name = "CXL Window 0", -> -> > flags = 0x200, desc = 0x0, parent = 0x0, sibling = 0x0, -> -> > child = 0x0} -> -> > -> -> > This produces the following output in /proc/iomem: -> -> > -> -> > 590000000-68fffffff : CXL Window 0 -> -> > 590000000-68fffffff : Reserved -> -> > -> -> > This reserved area causes `get_free_mem_region()` to fail due to a check -> -> > against `__region_intersects()`. Due to this reserved area, the -> -> > intersect check will only ever return REGION_INTERSECTS, which causes -> -> > `cxl create-region` to always fail. -> -> > -> -> > Signed-off-by: Gregory Price -> -> > --- -> -> > hw/i386/pc.c | 2 -- -> -> > 1 file changed, 2 deletions(-) -> -> > -> -> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c -> -> > index 566accf7e6..5bf5465a21 100644 -> -> > --- a/hw/i386/pc.c -> -> > +++ b/hw/i386/pc.c -> -> > @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, -> -> > hwaddr cxl_size = MiB; -> -> > -> -> > cxl_base = pc_get_cxl_range_start(pcms); -> -> > - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); -> -> > memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); -> -> > memory_region_add_subregion(system_memory, cxl_base, mr); -> -> > cxl_resv_end = cxl_base + cxl_size; -> -> > @@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, -> -> > memory_region_init_io(&fw->mr, OBJECT(machine), -> -> > &cfmws_ops, fw, -> -> > "cxl-fixed-memory-region", -> -> > fw->size); -> -> > memory_region_add_subregion(system_memory, fw->base, -> -> > &fw->mr); -> -> > - e820_add_entry(fw->base, fw->size, E820_RESERVED); -> -> > cxl_fmw_base += fw->size; -> -> > cxl_resv_end = cxl_fmw_base; -> -> > } -> -> > -- -> -> > 2.37.3 -> -> > - -> ->> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c -> ->> > index 566accf7e6..5bf5465a21 100644 -> ->> > --- a/hw/i386/pc.c -> ->> > +++ b/hw/i386/pc.c -> ->> > @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, -> ->> > hwaddr cxl_size = MiB; -> ->> > -> ->> > cxl_base = pc_get_cxl_range_start(pcms); -> ->> > - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); -Just dropping it doesn't look like a good plan to me. - -You can try set etc/reserved-memory-end fw_cfg file instead. Firmware -(both seabios and ovmf) read it and will make sure the 64bit pci mmio -window is placed above that address, i.e. this effectively reserves -address space. Right now used by memory hotplug code, but should work -for cxl too I think (disclaimer: don't know much about cxl ...). - -take care & HTH, - Gerd - -On Tue, 8 Nov 2022 12:21:11 +0100 -Gerd Hoffmann wrote: - -> -> >> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c -> -> >> > index 566accf7e6..5bf5465a21 100644 -> -> >> > --- a/hw/i386/pc.c -> -> >> > +++ b/hw/i386/pc.c -> -> >> > @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, -> -> >> > hwaddr cxl_size = MiB; -> -> >> > -> -> >> > cxl_base = pc_get_cxl_range_start(pcms); -> -> >> > - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); -> -> -Just dropping it doesn't look like a good plan to me. -> -> -You can try set etc/reserved-memory-end fw_cfg file instead. Firmware -> -(both seabios and ovmf) read it and will make sure the 64bit pci mmio -> -window is placed above that address, i.e. this effectively reserves -> -address space. Right now used by memory hotplug code, but should work -> -for cxl too I think (disclaimer: don't know much about cxl ...). -As far as I know CXL impl. in QEMU isn't using etc/reserved-memory-end -at all, it' has its own mapping. - -Regardless of that, reserved E820 entries look wrong, and looking at -commit message OS is right to bailout on them (expected according -to ACPI spec). -Also spec says - -" -E820 Assumptions and Limitations - [...] - The platform boot firmware does not return a range description for the memory -mapping of - PCI devices, ISA Option ROMs, and ISA Plug and Play cards because the OS has -mechanisms - available to detect them. -" - -so dropping reserved entries looks reasonable from ACPI spec point of view. -(disclaimer: don't know much about cxl ... either) -> -> -take care & HTH, -> -Gerd -> - -On Fri, Nov 11, 2022 at 11:51:23AM +0100, Igor Mammedov wrote: -> -On Tue, 8 Nov 2022 12:21:11 +0100 -> -Gerd Hoffmann wrote: -> -> -> > >> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c -> -> > >> > index 566accf7e6..5bf5465a21 100644 -> -> > >> > --- a/hw/i386/pc.c -> -> > >> > +++ b/hw/i386/pc.c -> -> > >> > @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, -> -> > >> > hwaddr cxl_size = MiB; -> -> > >> > -> -> > >> > cxl_base = pc_get_cxl_range_start(pcms); -> -> > >> > - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); -> -> -> -> Just dropping it doesn't look like a good plan to me. -> -> -> -> You can try set etc/reserved-memory-end fw_cfg file instead. Firmware -> -> (both seabios and ovmf) read it and will make sure the 64bit pci mmio -> -> window is placed above that address, i.e. this effectively reserves -> -> address space. Right now used by memory hotplug code, but should work -> -> for cxl too I think (disclaimer: don't know much about cxl ...). -> -> -As far as I know CXL impl. in QEMU isn't using etc/reserved-memory-end -> -at all, it' has its own mapping. -This should be changed. cxl should make sure the highest address used -is stored in etc/reserved-memory-end to avoid the firmware mapping pci -resources there. - -> -so dropping reserved entries looks reasonable from ACPI spec point of view. -Yep, I don't want dispute that. - -I suspect the reason for these entries to exist in the first place is to -inform the firmware that it should not place stuff there, and if we -remove that to conform with the spec we need some alternative way for -that ... - -take care, - Gerd - -On Fri, 11 Nov 2022 12:40:59 +0100 -Gerd Hoffmann wrote: - -> -On Fri, Nov 11, 2022 at 11:51:23AM +0100, Igor Mammedov wrote: -> -> On Tue, 8 Nov 2022 12:21:11 +0100 -> -> Gerd Hoffmann wrote: -> -> -> -> > > >> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c -> -> > > >> > index 566accf7e6..5bf5465a21 100644 -> -> > > >> > --- a/hw/i386/pc.c -> -> > > >> > +++ b/hw/i386/pc.c -> -> > > >> > @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, -> -> > > >> > hwaddr cxl_size = MiB; -> -> > > >> > -> -> > > >> > cxl_base = pc_get_cxl_range_start(pcms); -> -> > > >> > - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); -> -> > -> -> > Just dropping it doesn't look like a good plan to me. -> -> > -> -> > You can try set etc/reserved-memory-end fw_cfg file instead. Firmware -> -> > (both seabios and ovmf) read it and will make sure the 64bit pci mmio -> -> > window is placed above that address, i.e. this effectively reserves -> -> > address space. Right now used by memory hotplug code, but should work -> -> > for cxl too I think (disclaimer: don't know much about cxl ...). -> -> -> -> As far as I know CXL impl. in QEMU isn't using etc/reserved-memory-end -> -> at all, it' has its own mapping. -> -> -This should be changed. cxl should make sure the highest address used -> -is stored in etc/reserved-memory-end to avoid the firmware mapping pci -> -resources there. -if (pcmc->has_reserved_memory && machine->device_memory->base) { - -[...] - - if (pcms->cxl_devices_state.is_enabled) { - - res_mem_end = cxl_resv_end; - -that should be handled by this line - - } - - *val = cpu_to_le64(ROUND_UP(res_mem_end, 1 * GiB)); - - fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val)); - - } - -so SeaBIOS shouldn't intrude into CXL address space -(I assume EDK2 behave similarly here) - -> -> so dropping reserved entries looks reasonable from ACPI spec point of view. -> -> -> -> -Yep, I don't want dispute that. -> -> -I suspect the reason for these entries to exist in the first place is to -> -inform the firmware that it should not place stuff there, and if we -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -just to educate me, can you point out what SeaBIOS code does with reservations. - -> -remove that to conform with the spec we need some alternative way for -> -that ... -with etc/reserved-memory-end set as above, -is E820_RESERVED really needed here? - -(my understanding was that E820_RESERVED weren't accounted for when -initializing PCI devices) - -> -> -take care, -> -Gerd -> - -> -if (pcmc->has_reserved_memory && machine->device_memory->base) { -> -> -[...] -> -> -if (pcms->cxl_devices_state.is_enabled) { -> -> -res_mem_end = cxl_resv_end; -> -> -that should be handled by this line -> -> -} -> -> -*val = cpu_to_le64(ROUND_UP(res_mem_end, 1 * GiB)); -> -> -fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, -> -sizeof(*val)); -> -} -> -> -so SeaBIOS shouldn't intrude into CXL address space -Yes, looks good, so with this in place already everyting should be fine. - -> -(I assume EDK2 behave similarly here) -Correct, ovmf reads that fw_cfg file too. - -> -> I suspect the reason for these entries to exist in the first place is to -> -> inform the firmware that it should not place stuff there, and if we -> -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -> -just to educate me, can you point out what SeaBIOS code does with -> -reservations. -They are added to the e820 map which gets passed on to the OS. seabios -uses (and updateas) the e820 map too, when allocating memory for -example. While thinking about it I'm not fully sure it actually looks -at reservations, maybe it only uses (and updates) ram entries when -allocating memory. - -> -> remove that to conform with the spec we need some alternative way for -> -> that ... -> -> -with etc/reserved-memory-end set as above, -> -is E820_RESERVED really needed here? -No. Setting etc/reserved-memory-end is enough. - -So for the original patch: -Acked-by: Gerd Hoffmann - -take care, - Gerd - -On Fri, Nov 11, 2022 at 02:36:02PM +0100, Gerd Hoffmann wrote: -> -> if (pcmc->has_reserved_memory && machine->device_memory->base) { -> -> -> -> [...] -> -> -> -> if (pcms->cxl_devices_state.is_enabled) { -> -> -> -> res_mem_end = cxl_resv_end; -> -> -> -> that should be handled by this line -> -> -> -> } -> -> -> -> *val = cpu_to_le64(ROUND_UP(res_mem_end, 1 * GiB)); -> -> -> -> fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, -> -> sizeof(*val)); -> -> } -> -> -> -> so SeaBIOS shouldn't intrude into CXL address space -> -> -Yes, looks good, so with this in place already everyting should be fine. -> -> -> (I assume EDK2 behave similarly here) -> -> -Correct, ovmf reads that fw_cfg file too. -> -> -> > I suspect the reason for these entries to exist in the first place is to -> -> > inform the firmware that it should not place stuff there, and if we -> -> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -> -> just to educate me, can you point out what SeaBIOS code does with -> -> reservations. -> -> -They are added to the e820 map which gets passed on to the OS. seabios -> -uses (and updateas) the e820 map too, when allocating memory for -> -example. While thinking about it I'm not fully sure it actually looks -> -at reservations, maybe it only uses (and updates) ram entries when -> -allocating memory. -> -> -> > remove that to conform with the spec we need some alternative way for -> -> > that ... -> -> -> -> with etc/reserved-memory-end set as above, -> -> is E820_RESERVED really needed here? -> -> -No. Setting etc/reserved-memory-end is enough. -> -> -So for the original patch: -> -Acked-by: Gerd Hoffmann -> -> -take care, -> -Gerd -It's upstream already, sorry I can't add your tag. - --- -MST - diff --git a/classification_output/01/instruction/7733130 b/classification_output/01/instruction/7733130 deleted file mode 100644 index 1c3bc483f..000000000 --- a/classification_output/01/instruction/7733130 +++ /dev/null @@ -1,47 +0,0 @@ -instruction: 0.758 -semantic: 0.694 -other: 0.687 -mistranslation: 0.516 - -[Qemu-devel] [BUG] VNC: client won't send FramebufferUpdateRequest if job in flight is aborted - -Hi Gerd, Daniel. - -We noticed that if VncSharePolicy was configured with -VNC_SHARE_POLICY_FORCE_SHARED mode and -multiple vnc clients opened vnc connections, some clients could go blank screen -at high probability. -This problem can be reproduced when we regularly reboot suse12sp3 in graphic -mode both -with RealVNC and noVNC client. - -Then we dig into it and find out that some clients go blank screen because they -don't -send FramebufferUpdateRequest any more. One step further, we notice that each -time -the job in flight is aborted one client go blank screen. - -The bug is triggered in the following procedure. -Guest reboot => graphic mode switch => graphic_hw_update => vga_update_display -=> vga_draw_graphic (full_update = 1) => dpy_gfx_replace_surface => -vnc_dpy_switch => -vnc_abort_display_jobs (client may have job in flight) => job removed from the -queue -If one client has vnc job in flight, *vnc_abort_display_jobs* will wait until -its job is abandoned. -This behavior is done in vnc_worker_thread_loop when 'if (job->vs->ioc == NULL -|| job->vs->abort == true)' -branch is taken. - -As we can see, *vnc_abort_display_jobs* is intended to do some optimization to -avoid unnecessary client update. -But if client sends FramebufferUpdateRequest for some graphic area and its -FramebufferUpdate response job -is abandoned, the client may wait for the response and never send new -FramebufferUpdateRequest, which may -case the client go blank screen forever. - -So I am wondering whether we should drop the *vnc_abort_display_jobs* -optimization or do some trick here -to push the client to send new FramebufferUpdateRequest. Do you have any idea ? - diff --git a/classification_output/01/instruction/7960594 b/classification_output/01/instruction/7960594 deleted file mode 100644 index c06d35dd8..000000000 --- a/classification_output/01/instruction/7960594 +++ /dev/null @@ -1,158 +0,0 @@ -instruction: 0.991 -other: 0.979 -semantic: 0.974 -mistranslation: 0.930 - -[Qemu-devel] [Bug Report] vm paused after succeeding to migrate - -Hi, all -I encounterd a bug when I try to migrate a windows vm. - -Enviroment information: -host A: cpu E5620(model WestmereEP without flag xsave) -host B: cpu E5-2643(model SandyBridgeEP with xsave) - -The reproduce steps is : -1. Start a windows 2008 vm with -cpu host(which means host-passthrough). -2. Migrate the vm to host B when cr4.OSXSAVE=0 (successfully). -3. Vm runs on host B for a while so that cr4.OSXSAVE changes to 1. -4. Then migrate the vm to host A (successfully), but vm was paused, and qemu -printed log as followed: - -KVM: entry failed, hardware error 0x80000021 - -If you're running a guest on an Intel machine without unrestricted mode -support, the failure can be most likely due to the guest entering an invalid -state for Intel VT. For example, the guest maybe running in big real mode -which is not supported on less recent Intel processors. - -EAX=019b3bb0 EBX=01a3ae80 ECX=01a61ce8 EDX=00000000 -ESI=01a62000 EDI=00000000 EBP=00000000 ESP=01718b20 -EIP=0185d982 EFL=00000286 [--S--P-] CPL=0 II=0 A20=1 SMM=0 HLT=0 -ES =0000 00000000 0000ffff 00009300 -CS =f000 ffff0000 0000ffff 00009b00 -SS =0000 00000000 0000ffff 00009300 -DS =0000 00000000 0000ffff 00009300 -FS =0000 00000000 0000ffff 00009300 -GS =0000 00000000 0000ffff 00009300 -LDT=0000 00000000 0000ffff 00008200 -TR =0000 00000000 0000ffff 00008b00 -GDT= 00000000 0000ffff -IDT= 00000000 0000ffff -CR0=60000010 CR2=00000000 CR3=00000000 CR4=00000000 -DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000 -DR3=0000000000000000 -DR6=00000000ffff0ff0 DR7=0000000000000400 -EFER=0000000000000000 -Code=00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 <00> 00 00 00 -00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 - -I have found that problem happened when kvm_put_sregs returns err -22(called by -kvm_arch_put_registers(qemu)). -Because kvm_arch_vcpu_ioctl_set_sregs(kvm-mod) checked that guest_cpuid_has no -X86_FEATURE_XSAVE but cr4.OSXSAVE=1. -So should we cancel migration when kvm_arch_put_registers returns error? - -* linzhecheng (address@hidden) wrote: -> -Hi, all -> -I encounterd a bug when I try to migrate a windows vm. -> -> -Enviroment information: -> -host A: cpu E5620(model WestmereEP without flag xsave) -> -host B: cpu E5-2643(model SandyBridgeEP with xsave) -> -> -The reproduce steps is : -> -1. Start a windows 2008 vm with -cpu host(which means host-passthrough). -> -2. Migrate the vm to host B when cr4.OSXSAVE=0 (successfully). -> -3. Vm runs on host B for a while so that cr4.OSXSAVE changes to 1. -> -4. Then migrate the vm to host A (successfully), but vm was paused, and qemu -> -printed log as followed: -Remember that migrating using -cpu host across different CPU models is NOT -expected to work. - -> -KVM: entry failed, hardware error 0x80000021 -> -> -If you're running a guest on an Intel machine without unrestricted mode -> -support, the failure can be most likely due to the guest entering an invalid -> -state for Intel VT. For example, the guest maybe running in big real mode -> -which is not supported on less recent Intel processors. -> -> -EAX=019b3bb0 EBX=01a3ae80 ECX=01a61ce8 EDX=00000000 -> -ESI=01a62000 EDI=00000000 EBP=00000000 ESP=01718b20 -> -EIP=0185d982 EFL=00000286 [--S--P-] CPL=0 II=0 A20=1 SMM=0 HLT=0 -> -ES =0000 00000000 0000ffff 00009300 -> -CS =f000 ffff0000 0000ffff 00009b00 -> -SS =0000 00000000 0000ffff 00009300 -> -DS =0000 00000000 0000ffff 00009300 -> -FS =0000 00000000 0000ffff 00009300 -> -GS =0000 00000000 0000ffff 00009300 -> -LDT=0000 00000000 0000ffff 00008200 -> -TR =0000 00000000 0000ffff 00008b00 -> -GDT= 00000000 0000ffff -> -IDT= 00000000 0000ffff -> -CR0=60000010 CR2=00000000 CR3=00000000 CR4=00000000 -> -DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000 -> -DR3=0000000000000000 -> -DR6=00000000ffff0ff0 DR7=0000000000000400 -> -EFER=0000000000000000 -> -Code=00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 <00> 00 00 -> -00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 -> -00 -> -> -I have found that problem happened when kvm_put_sregs returns err -22(called -> -by kvm_arch_put_registers(qemu)). -> -Because kvm_arch_vcpu_ioctl_set_sregs(kvm-mod) checked that guest_cpuid_has -> -no X86_FEATURE_XSAVE but cr4.OSXSAVE=1. -> -So should we cancel migration when kvm_arch_put_registers returns error? -It would seem good if we can make the migration fail there rather than -hitting that KVM error. -It looks like we need to do a bit of plumbing to convert the places that -call it to return a bool rather than void. - -Dave - --- -Dr. David Alan Gilbert / address@hidden / Manchester, UK - diff --git a/classification_output/01/instruction/8019995 b/classification_output/01/instruction/8019995 deleted file mode 100644 index 92d85cc82..000000000 --- a/classification_output/01/instruction/8019995 +++ /dev/null @@ -1,31 +0,0 @@ -instruction: 0.753 -semantic: 0.698 -mistranslation: 0.633 -other: 0.620 - -[BUG]The latest qemu crashed when I tested cxl - -I test cxl with the patch:[v11,0/2] arm/virt: - CXL support via pxb_cxl. -https://patchwork.kernel.org/project/cxl/cover/20220616141950.23374-1-Jonathan.Cameron@huawei.com/ -But the qemu crashed,and showing an error: -qemu-system-aarch64: ../hw/arm/virt.c:1735: virt_get_high_memmap_enabled: - Assertion `ARRAY_SIZE(extended_memmap) - VIRT_LOWMEMMAP_LAST == ARRAY_SIZE(enabled_array)' failed. -Then I modify the patch to fix the bug: -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index ea2413a0ba..3d4cee3491 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -1710,6 +1730,7 @@ static inline bool *virt_get_high_memmap_enabled(VirtMachineState - *vms, -&vms->highmem_redists, -&vms->highmem_ecam, -&vms->highmem_mmio, -+ &vms->cxl_devices_state.is_enabled, -}; -Now qemu works good. -Could you tell me when the patch( -arm/virt: - CXL support via pxb_cxl -) will be merged into upstream? - diff --git a/classification_output/01/instruction/8566429 b/classification_output/01/instruction/8566429 deleted file mode 100644 index dfac92bf4..000000000 --- a/classification_output/01/instruction/8566429 +++ /dev/null @@ -1,49 +0,0 @@ -instruction: 0.905 -other: 0.898 -semantic: 0.825 -mistranslation: 0.462 - -[Qemu-devel] [BUG]pcibus_reset assertion failure on guest reboot - -Qemu-2.6.2 - -Start a vm with vhost-net , do reboot and hot-unplug viritio-net nic in short -time, we touch -pcibus_reset assertion failure. - -Here is qemu log: -22:29:46.359386+08:00 acpi_pm1_cnt_write -> guest do soft power off -22:29:46.785310+08:00 qemu_devices_reset -22:29:46.788093+08:00 virtio_pci_device_unplugged -> virtio net unpluged -22:29:46.803427+08:00 pcibus_reset: Assertion `bus->irq_count[i] == 0' failed. - -Here is stack info: -(gdb) bt -#0 0x00007f9a336795d7 in raise () from /usr/lib64/libc.so.6 -#1 0x00007f9a3367acc8 in abort () from /usr/lib64/libc.so.6 -#2 0x00007f9a33672546 in __assert_fail_base () from /usr/lib64/libc.so.6 -#3 0x00007f9a336725f2 in __assert_fail () from /usr/lib64/libc.so.6 -#4 0x0000000000641884 in pcibus_reset (qbus=0x29eee60) at hw/pci/pci.c:283 -#5 0x00000000005bfc30 in qbus_reset_one (bus=0x29eee60, opaque=) at hw/core/qdev.c:319 -#6 0x00000000005c1b19 in qdev_walk_children (dev=0x29ed2b0, pre_devfn=0x0, -pre_busfn=0x0, post_devfn=0x5c2440 ... -#7 0x00000000005c1c59 in qbus_walk_children (bus=0x2736f80, pre_devfn=0x0, -pre_busfn=0x0, post_devfn=0x5c2440 ... -#8 0x00000000005513f5 in qemu_devices_reset () at vl.c:1998 -#9 0x00000000004cab9d in pc_machine_reset () at -/home/abuild/rpmbuild/BUILD/qemu-kvm-2.6.0/hw/i386/pc.c:1976 -#10 0x000000000055148b in qemu_system_reset (address@hidden) at vl.c:2011 -#11 0x000000000055164f in main_loop_should_exit () at vl.c:2169 -#12 0x0000000000551719 in main_loop () at vl.c:2212 -#13 0x000000000041c9a8 in main (argc=, argv=, -envp=) at vl.c:5130 -(gdb) f 4 -... -(gdb) p bus->irq_count[0] -$6 = 1 - -Seems pci_update_irq_disabled doesn't work well - -can anyone help? - diff --git a/classification_output/01/instruction/9818783 b/classification_output/01/instruction/9818783 deleted file mode 100644 index a78585284..000000000 --- a/classification_output/01/instruction/9818783 +++ /dev/null @@ -1,308 +0,0 @@ -instruction: 0.985 -other: 0.985 -semantic: 0.984 -mistranslation: 0.983 - -[BUG][powerpc] KVM Guest Boot Failure – Hangs at "Booting Linux via __start()” - -Bug Description: -Encountering a boot failure when launching a KVM guest with -qemu-system-ppc64. The guest hangs at boot, and the QEMU monitor -crashes. -Reproduction Steps: -# qemu-system-ppc64 --version -QEMU emulator version 9.2.50 (v9.2.0-2799-g0462a32b4f) -Copyright (c) 2003-2025 Fabrice Bellard and the QEMU Project developers -# /usr/bin/qemu-system-ppc64 -name avocado-vt-vm1 -machine -pseries,accel=kvm \ --m 32768 -smp 32,sockets=1,cores=32,threads=1 -nographic \ - -device virtio-scsi-pci,id=scsi \ --drive -file=/home/kvmci/tests/data/avocado-vt/images/rhel8.0devel-ppc64le.qcow2,if=none,id=drive0,format=qcow2 -\ --device scsi-hd,drive=drive0,bus=scsi.0 \ - -netdev bridge,id=net0,br=virbr0 \ - -device virtio-net-pci,netdev=net0 \ - -serial pty \ - -device virtio-balloon-pci \ - -cpu host -QEMU 9.2.50 monitor - type 'help' for more information -char device redirected to /dev/pts/2 (label serial0) -(qemu) -(qemu) qemu-system-ppc64: warning: kernel_irqchip allowed but -unavailable: IRQ_XIVE capability must be present for KVM -Falling back to kernel-irqchip=off -** Qemu Hang - -(In another ssh session) -# screen /dev/pts/2 -Preparing to boot Linux version 6.10.4-200.fc40.ppc64le -(mockbuild@c23cc4e677614c34bb22d54eeea4dc1f) (gcc (GCC) 14.2.1 20240801 -(Red Hat 14.2.1-1), GNU ld version 2.41-37.fc40) #1 SMP Sun Aug 11 -15:20:17 UTC 2024 -Detected machine type: 0000000000000101 -command line: -BOOT_IMAGE=(ieee1275/disk,msdos2)/vmlinuz-6.10.4-200.fc40.ppc64le -root=/dev/mapper/fedora-root ro rd.lvm.lv=fedora/root crashkernel=1024M -Max number of cores passed to firmware: 2048 (NR_CPUS = 2048) -Calling ibm,client-architecture-support... done -memory layout at init: - memory_limit : 0000000000000000 (16 MB aligned) - alloc_bottom : 0000000008200000 - alloc_top : 0000000030000000 - alloc_top_hi : 0000000800000000 - rmo_top : 0000000030000000 - ram_top : 0000000800000000 -instantiating rtas at 0x000000002fff0000... done -prom_hold_cpus: skipped -copying OF device tree... -Building dt strings... -Building dt structure... -Device tree strings 0x0000000008210000 -> 0x0000000008210bd0 -Device tree struct 0x0000000008220000 -> 0x0000000008230000 -Quiescing Open Firmware ... -Booting Linux via __start() @ 0x0000000000440000 ... -** Guest Console Hang - - -Git Bisect: -Performing git bisect points to the following patch: -# git bisect bad -e8291ec16da80566c121c68d9112be458954d90b is the first bad commit -commit e8291ec16da80566c121c68d9112be458954d90b (HEAD) -Author: Nicholas Piggin -Date: Thu Dec 19 13:40:31 2024 +1000 - - target/ppc: fix timebase register reset state -(H)DEC and PURR get reset before icount does, which causes them to -be -skewed and not match the init state. This can cause replay to not -match the recorded trace exactly. For DEC and HDEC this is usually -not -noticable since they tend to get programmed before affecting the - target machine. PURR has been observed to cause replay bugs when - running Linux. - - Fix this by resetting using a time of 0. - - Message-ID: <20241219034035.1826173-2-npiggin@gmail.com> - Signed-off-by: Nicholas Piggin - - hw/ppc/ppc.c | 11 ++++++++--- - 1 file changed, 8 insertions(+), 3 deletions(-) - - -Reverting the patch helps boot the guest. -Thanks, -Misbah Anjum N - -Thanks for the report. - -Tricky problem. A secondary CPU is hanging before it is started by the -primary via rtas call. - -That secondary keeps calling kvm_cpu_exec(), which keeps exiting out -early with EXCP_HLT because kvm_arch_process_async_events() returns -true because that cpu has ->halted=1. That just goes around he run -loop because there is an interrupt pending (DEC). - -So it never runs. It also never releases the BQL, and another CPU, -the primary which is actually supposed to be running, is stuck in -spapr_set_all_lpcrs() in run_on_cpu() waiting for the BQL. - -This patch just exposes the bug I think, by causing the interrupt. -although I'm not quite sure why it's okay previously (-ve decrementer -values should be causing a timer exception too). The timer exception -should not be taken as an interrupt by those secondary CPUs, and it -doesn't because it is masked, until set_all_lpcrs sets an LPCR value -that enables powersave wakeup on decrementer interrupt. - -The start_powered_off sate just sets ->halted, which makes it look -like a powersaving state. Logically I think it's not the same thing -as far as spapr goes. I don't know why start_powered_off only sets -->halted, and not ->stop/stopped as well. - -Not sure how best to solve it cleanly. I'll send a revert if I can't -get something working soon. - -Thanks, -Nick - -On Tue Mar 18, 2025 at 7:09 AM AEST, misanjum wrote: -> -Bug Description: -> -Encountering a boot failure when launching a KVM guest with -> -qemu-system-ppc64. The guest hangs at boot, and the QEMU monitor -> -crashes. -> -> -> -Reproduction Steps: -> -# qemu-system-ppc64 --version -> -QEMU emulator version 9.2.50 (v9.2.0-2799-g0462a32b4f) -> -Copyright (c) 2003-2025 Fabrice Bellard and the QEMU Project developers -> -> -# /usr/bin/qemu-system-ppc64 -name avocado-vt-vm1 -machine -> -pseries,accel=kvm \ -> --m 32768 -smp 32,sockets=1,cores=32,threads=1 -nographic \ -> --device virtio-scsi-pci,id=scsi \ -> --drive -> -file=/home/kvmci/tests/data/avocado-vt/images/rhel8.0devel-ppc64le.qcow2,if=none,id=drive0,format=qcow2 -> -> -\ -> --device scsi-hd,drive=drive0,bus=scsi.0 \ -> --netdev bridge,id=net0,br=virbr0 \ -> --device virtio-net-pci,netdev=net0 \ -> --serial pty \ -> --device virtio-balloon-pci \ -> --cpu host -> -QEMU 9.2.50 monitor - type 'help' for more information -> -char device redirected to /dev/pts/2 (label serial0) -> -(qemu) -> -(qemu) qemu-system-ppc64: warning: kernel_irqchip allowed but -> -unavailable: IRQ_XIVE capability must be present for KVM -> -Falling back to kernel-irqchip=off -> -** Qemu Hang -> -> -(In another ssh session) -> -# screen /dev/pts/2 -> -Preparing to boot Linux version 6.10.4-200.fc40.ppc64le -> -(mockbuild@c23cc4e677614c34bb22d54eeea4dc1f) (gcc (GCC) 14.2.1 20240801 -> -(Red Hat 14.2.1-1), GNU ld version 2.41-37.fc40) #1 SMP Sun Aug 11 -> -15:20:17 UTC 2024 -> -Detected machine type: 0000000000000101 -> -command line: -> -BOOT_IMAGE=(ieee1275/disk,msdos2)/vmlinuz-6.10.4-200.fc40.ppc64le -> -root=/dev/mapper/fedora-root ro rd.lvm.lv=fedora/root crashkernel=1024M -> -Max number of cores passed to firmware: 2048 (NR_CPUS = 2048) -> -Calling ibm,client-architecture-support... done -> -memory layout at init: -> -memory_limit : 0000000000000000 (16 MB aligned) -> -alloc_bottom : 0000000008200000 -> -alloc_top : 0000000030000000 -> -alloc_top_hi : 0000000800000000 -> -rmo_top : 0000000030000000 -> -ram_top : 0000000800000000 -> -instantiating rtas at 0x000000002fff0000... done -> -prom_hold_cpus: skipped -> -copying OF device tree... -> -Building dt strings... -> -Building dt structure... -> -Device tree strings 0x0000000008210000 -> 0x0000000008210bd0 -> -Device tree struct 0x0000000008220000 -> 0x0000000008230000 -> -Quiescing Open Firmware ... -> -Booting Linux via __start() @ 0x0000000000440000 ... -> -** Guest Console Hang -> -> -> -Git Bisect: -> -Performing git bisect points to the following patch: -> -# git bisect bad -> -e8291ec16da80566c121c68d9112be458954d90b is the first bad commit -> -commit e8291ec16da80566c121c68d9112be458954d90b (HEAD) -> -Author: Nicholas Piggin -> -Date: Thu Dec 19 13:40:31 2024 +1000 -> -> -target/ppc: fix timebase register reset state -> -> -(H)DEC and PURR get reset before icount does, which causes them to -> -be -> -skewed and not match the init state. This can cause replay to not -> -match the recorded trace exactly. For DEC and HDEC this is usually -> -not -> -noticable since they tend to get programmed before affecting the -> -target machine. PURR has been observed to cause replay bugs when -> -running Linux. -> -> -Fix this by resetting using a time of 0. -> -> -Message-ID: <20241219034035.1826173-2-npiggin@gmail.com> -> -Signed-off-by: Nicholas Piggin -> -> -hw/ppc/ppc.c | 11 ++++++++--- -> -1 file changed, 8 insertions(+), 3 deletions(-) -> -> -> -Reverting the patch helps boot the guest. -> -Thanks, -> -Misbah Anjum N - diff --git a/classification_output/01/mistranslation/0247400 b/classification_output/01/mistranslation/0247400 deleted file mode 100644 index 746a624cc..000000000 --- a/classification_output/01/mistranslation/0247400 +++ /dev/null @@ -1,1486 +0,0 @@ -mistranslation: 0.659 -instruction: 0.624 -semantic: 0.600 -other: 0.598 - -[Qemu-devel][bug] qemu crash when migrate vm and vm's disks - -When migrate vm and vm’s disks target host qemu crash due to an invalid free. -#0  object_unref (obj=0x1000) at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/qom/object.c:920 -#1  0x0000560434d79e79 in memory_region_unref (mr=) -at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:1730 -#2  flatview_destroy (view=0x560439653880) at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:292 -#3  0x000056043514dfbe in call_rcu_thread (opaque=) -at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/util/rcu.c:284 -#4  0x00007fbc2b36fe25 in start_thread () from /lib64/libpthread.so.0 -#5  0x00007fbc2b099bad in clone () from /lib64/libc.so.6 -test base qemu-2.12.0 -, -but use lastest qemu(v6.0.0-rc2) also reproduce. -As follow patch can resolve this problem: -https://lists.gnu.org/archive/html/qemu-devel/2018-07/msg02272.html -Steps to reproduce: -(1) Create VM (virsh define) -(2) Add 64 virtio scsi disks -(3) migrate vm and vm’disks -------------------------------------------------------------------------------------------------------------------------------------- -本邮件及其附件含有新华三集团的保密信息,仅限于发送给上面地址中列出 -的个人或群组。禁止任何其他人以任何形式使用(包括但不限于全部或部分地泄露、复制、 -或散发)本邮件中的信息。如果您错收了本邮件,请您立即电话或邮件通知发件人并删除本 -邮件! -This e-mail and its attachments contain confidential information from New H3C, which is -intended only for the person or entity whose address is listed above. Any use of the -information contained herein in any way (including, but not limited to, total or partial -disclosure, reproduction, or dissemination) by persons other than the intended -recipient(s) is prohibited. If you receive this e-mail in error, please notify the sender -by phone or email immediately and delete it! - -* Yuchen (yu.chen@h3c.com) wrote: -> -When migrate vm and vm’s disks target host qemu crash due to an invalid free. -> -> -#0 object_unref (obj=0x1000) at -> -/qemu-2.12/rpmbuild/BUILD/qemu-2.12/qom/object.c:920 -> -#1 0x0000560434d79e79 in memory_region_unref (mr=) -> -at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:1730 -> -#2 flatview_destroy (view=0x560439653880) at -> -/qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:292 -> -#3 0x000056043514dfbe in call_rcu_thread (opaque=) -> -at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/util/rcu.c:284 -> -#4 0x00007fbc2b36fe25 in start_thread () from /lib64/libpthread.so.0 -> -#5 0x00007fbc2b099bad in clone () from /lib64/libc.so.6 -> -> -test base qemu-2.12.0,but use lastest qemu(v6.0.0-rc2) also reproduce. -Interesting. - -> -As follow patch can resolve this problem: -> -https://lists.gnu.org/archive/html/qemu-devel/2018-07/msg02272.html -That's a pci/rcu change; ccing Paolo and Micahel. - -> -Steps to reproduce: -> -(1) Create VM (virsh define) -> -(2) Add 64 virtio scsi disks -Is that hot adding the disks later, or are they included in the VM at -creation? -Can you provide a libvirt XML example? - -> -(3) migrate vm and vm’disks -What do you mean by 'and vm disks' - are you doing a block migration? - -Dave - -> -------------------------------------------------------------------------------------------------------------------------------------- -> -本邮件及其附件含有新华三集团的保密信息,仅限于发送给上面地址中列出 -> -的个人或群组。禁止任何其他人以任何形式使用(包括但不限于全部或部分地泄露、复制、 -> -或散发)本邮件中的信息。如果您错收了本邮件,请您立即电话或邮件通知发件人并删除本 -> -邮件! -> -This e-mail and its attachments contain confidential information from New -> -H3C, which is -> -intended only for the person or entity whose address is listed above. Any use -> -of the -> -information contained herein in any way (including, but not limited to, total -> -or partial -> -disclosure, reproduction, or dissemination) by persons other than the intended -> -recipient(s) is prohibited. If you receive this e-mail in error, please -> -notify the sender -> -by phone or email immediately and delete it! --- -Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK - -> ------邮件原件----- -> -发件人: Dr. David Alan Gilbert [ -mailto:dgilbert@redhat.com -] -> -发送时间: 2021å¹´4月8日 19:27 -> -收件人: yuchen (Cloud) ; pbonzini@redhat.com; -> -mst@redhat.com -> -抄送: qemu-devel@nongnu.org -> -主题: Re: [Qemu-devel][bug] qemu crash when migrate vm and vm's disks -> -> -* Yuchen (yu.chen@h3c.com) wrote: -> -> When migrate vm and vm’s disks target host qemu crash due to an invalid -> -free. -> -> -> -> #0 object_unref (obj=0x1000) at -> -> /qemu-2.12/rpmbuild/BUILD/qemu-2.12/qom/object.c:920 -> -> #1 0x0000560434d79e79 in memory_region_unref (mr=) -> -> at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:1730 -> -> #2 flatview_destroy (view=0x560439653880) at -> -> /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:292 -> -> #3 0x000056043514dfbe in call_rcu_thread (opaque=) -> -> at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/util/rcu.c:284 -> -> #4 0x00007fbc2b36fe25 in start_thread () from /lib64/libpthread.so.0 -> -> #5 0x00007fbc2b099bad in clone () from /lib64/libc.so.6 -> -> -> -> test base qemu-2.12.0,but use lastest qemu(v6.0.0-rc2) also reproduce. -> -> -Interesting. -> -> -> As follow patch can resolve this problem: -> -> -https://lists.gnu.org/archive/html/qemu-devel/2018-07/msg02272.html -> -> -That's a pci/rcu change; ccing Paolo and Micahel. -> -> -> Steps to reproduce: -> -> (1) Create VM (virsh define) -> -> (2) Add 64 virtio scsi disks -> -> -Is that hot adding the disks later, or are they included in the VM at -> -creation? -> -Can you provide a libvirt XML example? -> -Include disks in the VM at creation - -vm disks xml (only virtio scsi disks): - - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - -
- - - - - -
- - - - -
- - - -vm disks xml (only virtio disks): - - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - - -
- - - - -
- - - -> -> (3) migrate vm and vm’disks -> -> -What do you mean by 'and vm disks' - are you doing a block migration? -> -Yes, block migration. -In fact, only migration domain also reproduced. - -> -Dave -> -> -> ---------------------------------------------------------------------- -> -> --------------------------------------------------------------- -> -Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK -------------------------------------------------------------------------------------------------------------------------------------- -本邮件及其附件含有新华三集团的保密信息,仅限于发送给上面地址中列出 -的个人或群组。禁止任何其他人以任何形式使用(包括但不限于全部或部分地泄露、复制、 -或散发)本邮件中的信息。如果您错收了本邮件,请您立即电话或邮件通知发件人并删除本 -邮件! -This e-mail and its attachments contain confidential information from New H3C, -which is -intended only for the person or entity whose address is listed above. Any use -of the -information contained herein in any way (including, but not limited to, total -or partial -disclosure, reproduction, or dissemination) by persons other than the intended -recipient(s) is prohibited. If you receive this e-mail in error, please notify -the sender -by phone or email immediately and delete it! - diff --git a/classification_output/01/mistranslation/1267916 b/classification_output/01/mistranslation/1267916 deleted file mode 100644 index fffafcf77..000000000 --- a/classification_output/01/mistranslation/1267916 +++ /dev/null @@ -1,1878 +0,0 @@ -mistranslation: 0.927 -instruction: 0.903 -semantic: 0.891 -other: 0.877 - -[Qemu-devel] [TCG only][Migration Bug? ] Occasionally, the content of VM's memory is inconsistent between Source and Destination of migration - -Hi all, - -Does anyboday remember the similar issue post by hailiang months ago -http://patchwork.ozlabs.org/patch/454322/ -At least tow bugs about migration had been fixed since that. -And now we found the same issue at the tcg vm(kvm is fine), after -migration, the content VM's memory is inconsistent. -we add a patch to check memory content, you can find it from affix - -steps to reporduce: -1) apply the patch and re-build qemu -2) prepare the ubuntu guest and run memtest in grub. -soruce side: -x86_64-softmmu/qemu-system-x86_64 -netdev tap,id=hn0 -device -e1000,id=net-pci0,netdev=hn0,mac=52:54:00:12:34:65 -boot c -drive -if=none,file=/home/lizj/ubuntu.raw,id=drive-virtio-disk0 -device -virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 --vnc :7 -m 128 -smp 1 -device piix3-usb-uhci -device usb-tablet -qmp -tcp::4444,server,nowait -monitor stdio -cpu qemu64 -machine -pc-i440fx-2.3,accel=tcg,usb=off -destination side: -x86_64-softmmu/qemu-system-x86_64 -netdev tap,id=hn0 -device -e1000,id=net-pci0,netdev=hn0,mac=52:54:00:12:34:65 -boot c -drive -if=none,file=/home/lizj/ubuntu.raw,id=drive-virtio-disk0 -device -virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 --vnc :7 -m 128 -smp 1 -device piix3-usb-uhci -device usb-tablet -qmp -tcp::4444,server,nowait -monitor stdio -cpu qemu64 -machine -pc-i440fx-2.3,accel=tcg,usb=off -incoming tcp:0:8881 -3) start migration -with 1000M NIC, migration will finish within 3 min. - -at source: -(qemu) migrate tcp:192.168.2.66:8881 -after saving ram complete -e9e725df678d392b1a83b3a917f332bb -qemu-system-x86_64: end ram md5 -(qemu) - -at destination: -...skip... -Completed load of VM with exit code 0 seq iteration 1264 -Completed load of VM with exit code 0 seq iteration 1265 -Completed load of VM with exit code 0 seq iteration 1266 -qemu-system-x86_64: after loading state section id 2(ram) -49c2dac7bde0e5e22db7280dcb3824f9 -qemu-system-x86_64: end ram md5 -qemu-system-x86_64: qemu_loadvm_state: after cpu_synchronize_all_post_init - -49c2dac7bde0e5e22db7280dcb3824f9 -qemu-system-x86_64: end ram md5 - -This occurs occasionally and only at tcg machine. It seems that -some pages dirtied in source side don't transferred to destination. -This problem can be reproduced even if we disable virtio. -Is it OK for some pages that not transferred to destination when do -migration ? Or is it a bug? -Any idea... - -=================md5 check patch============================= - -diff --git a/Makefile.target b/Makefile.target -index 962d004..e2cb8e9 100644 ---- a/Makefile.target -+++ b/Makefile.target -@@ -139,7 +139,7 @@ obj-y += memory.o cputlb.o - obj-y += memory_mapping.o - obj-y += dump.o - obj-y += migration/ram.o migration/savevm.o --LIBS := $(libs_softmmu) $(LIBS) -+LIBS := $(libs_softmmu) $(LIBS) -lplumb - - # xen support - obj-$(CONFIG_XEN) += xen-common.o -diff --git a/migration/ram.c b/migration/ram.c -index 1eb155a..3b7a09d 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -2513,7 +2513,7 @@ static int ram_load(QEMUFile *f, void *opaque, int -version_id) -} - - rcu_read_unlock(); -- DPRINTF("Completed load of VM with exit code %d seq iteration " -+ fprintf(stderr, "Completed load of VM with exit code %d seq iteration " - "%" PRIu64 "\n", ret, seq_iter); - return ret; - } -diff --git a/migration/savevm.c b/migration/savevm.c -index 0ad1b93..3feaa61 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -891,6 +891,29 @@ void qemu_savevm_state_header(QEMUFile *f) - - } - -+#include "exec/ram_addr.h" -+#include "qemu/rcu_queue.h" -+#include -+#ifndef MD5_DIGEST_LENGTH -+#define MD5_DIGEST_LENGTH 16 -+#endif -+ -+static void check_host_md5(void) -+{ -+ int i; -+ unsigned char md[MD5_DIGEST_LENGTH]; -+ rcu_read_lock(); -+ RAMBlock *block = QLIST_FIRST_RCU(&ram_list.blocks);/* Only check -'pc.ram' block */ -+ rcu_read_unlock(); -+ -+ MD5(block->host, block->used_length, md); -+ for(i = 0; i < MD5_DIGEST_LENGTH; i++) { -+ fprintf(stderr, "%02x", md[i]); -+ } -+ fprintf(stderr, "\n"); -+ error_report("end ram md5"); -+} -+ - void qemu_savevm_state_begin(QEMUFile *f, - const MigrationParams *params) - { -@@ -1056,6 +1079,10 @@ void qemu_savevm_state_complete_precopy(QEMUFile -*f, bool iterable_only) -save_section_header(f, se, QEMU_VM_SECTION_END); - - ret = se->ops->save_live_complete_precopy(f, se->opaque); -+ -+ fprintf(stderr, "after saving %s complete\n", se->idstr); -+ check_host_md5(); -+ - trace_savevm_section_end(se->idstr, se->section_id, ret); - save_section_footer(f, se); - if (ret < 0) { -@@ -1791,6 +1818,11 @@ static int qemu_loadvm_state_main(QEMUFile *f, -MigrationIncomingState *mis) -section_id, le->se->idstr); - return ret; - } -+ if (section_type == QEMU_VM_SECTION_END) { -+ error_report("after loading state section id %d(%s)", -+ section_id, le->se->idstr); -+ check_host_md5(); -+ } - if (!check_section_footer(f, le)) { - return -EINVAL; - } -@@ -1901,6 +1933,8 @@ int qemu_loadvm_state(QEMUFile *f) - } - - cpu_synchronize_all_post_init(); -+ error_report("%s: after cpu_synchronize_all_post_init\n", __func__); -+ check_host_md5(); - - return ret; - } - -* Li Zhijian (address@hidden) wrote: -> -Hi all, -> -> -Does anyboday remember the similar issue post by hailiang months ago -> -http://patchwork.ozlabs.org/patch/454322/ -> -At least tow bugs about migration had been fixed since that. -Yes, I wondered what happened to that. - -> -And now we found the same issue at the tcg vm(kvm is fine), after migration, -> -the content VM's memory is inconsistent. -Hmm, TCG only - I don't know much about that; but I guess something must -be accessing memory without using the proper macros/functions so -it doesn't mark it as dirty. - -> -we add a patch to check memory content, you can find it from affix -> -> -steps to reporduce: -> -1) apply the patch and re-build qemu -> -2) prepare the ubuntu guest and run memtest in grub. -> -soruce side: -> -x86_64-softmmu/qemu-system-x86_64 -netdev tap,id=hn0 -device -> -e1000,id=net-pci0,netdev=hn0,mac=52:54:00:12:34:65 -boot c -drive -> -if=none,file=/home/lizj/ubuntu.raw,id=drive-virtio-disk0 -device -> -virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 -> --vnc :7 -m 128 -smp 1 -device piix3-usb-uhci -device usb-tablet -qmp -> -tcp::4444,server,nowait -monitor stdio -cpu qemu64 -machine -> -pc-i440fx-2.3,accel=tcg,usb=off -> -> -destination side: -> -x86_64-softmmu/qemu-system-x86_64 -netdev tap,id=hn0 -device -> -e1000,id=net-pci0,netdev=hn0,mac=52:54:00:12:34:65 -boot c -drive -> -if=none,file=/home/lizj/ubuntu.raw,id=drive-virtio-disk0 -device -> -virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 -> --vnc :7 -m 128 -smp 1 -device piix3-usb-uhci -device usb-tablet -qmp -> -tcp::4444,server,nowait -monitor stdio -cpu qemu64 -machine -> -pc-i440fx-2.3,accel=tcg,usb=off -incoming tcp:0:8881 -> -> -3) start migration -> -with 1000M NIC, migration will finish within 3 min. -> -> -at source: -> -(qemu) migrate tcp:192.168.2.66:8881 -> -after saving ram complete -> -e9e725df678d392b1a83b3a917f332bb -> -qemu-system-x86_64: end ram md5 -> -(qemu) -> -> -at destination: -> -...skip... -> -Completed load of VM with exit code 0 seq iteration 1264 -> -Completed load of VM with exit code 0 seq iteration 1265 -> -Completed load of VM with exit code 0 seq iteration 1266 -> -qemu-system-x86_64: after loading state section id 2(ram) -> -49c2dac7bde0e5e22db7280dcb3824f9 -> -qemu-system-x86_64: end ram md5 -> -qemu-system-x86_64: qemu_loadvm_state: after cpu_synchronize_all_post_init -> -> -49c2dac7bde0e5e22db7280dcb3824f9 -> -qemu-system-x86_64: end ram md5 -> -> -This occurs occasionally and only at tcg machine. It seems that -> -some pages dirtied in source side don't transferred to destination. -> -This problem can be reproduced even if we disable virtio. -> -> -Is it OK for some pages that not transferred to destination when do -> -migration ? Or is it a bug? -I'm pretty sure that means it's a bug. Hard to find though, I guess -at least memtest is smaller than a big OS. I think I'd dump the whole -of memory on both sides, hexdump and diff them - I'd guess it would -just be one byte/word different, maybe that would offer some idea what -wrote it. - -Dave - -> -Any idea... -> -> -=================md5 check patch============================= -> -> -diff --git a/Makefile.target b/Makefile.target -> -index 962d004..e2cb8e9 100644 -> ---- a/Makefile.target -> -+++ b/Makefile.target -> -@@ -139,7 +139,7 @@ obj-y += memory.o cputlb.o -> -obj-y += memory_mapping.o -> -obj-y += dump.o -> -obj-y += migration/ram.o migration/savevm.o -> --LIBS := $(libs_softmmu) $(LIBS) -> -+LIBS := $(libs_softmmu) $(LIBS) -lplumb -> -> -# xen support -> -obj-$(CONFIG_XEN) += xen-common.o -> -diff --git a/migration/ram.c b/migration/ram.c -> -index 1eb155a..3b7a09d 100644 -> ---- a/migration/ram.c -> -+++ b/migration/ram.c -> -@@ -2513,7 +2513,7 @@ static int ram_load(QEMUFile *f, void *opaque, int -> -version_id) -> -} -> -> -rcu_read_unlock(); -> -- DPRINTF("Completed load of VM with exit code %d seq iteration " -> -+ fprintf(stderr, "Completed load of VM with exit code %d seq iteration " -> -"%" PRIu64 "\n", ret, seq_iter); -> -return ret; -> -} -> -diff --git a/migration/savevm.c b/migration/savevm.c -> -index 0ad1b93..3feaa61 100644 -> ---- a/migration/savevm.c -> -+++ b/migration/savevm.c -> -@@ -891,6 +891,29 @@ void qemu_savevm_state_header(QEMUFile *f) -> -> -} -> -> -+#include "exec/ram_addr.h" -> -+#include "qemu/rcu_queue.h" -> -+#include -> -+#ifndef MD5_DIGEST_LENGTH -> -+#define MD5_DIGEST_LENGTH 16 -> -+#endif -> -+ -> -+static void check_host_md5(void) -> -+{ -> -+ int i; -> -+ unsigned char md[MD5_DIGEST_LENGTH]; -> -+ rcu_read_lock(); -> -+ RAMBlock *block = QLIST_FIRST_RCU(&ram_list.blocks);/* Only check -> -'pc.ram' block */ -> -+ rcu_read_unlock(); -> -+ -> -+ MD5(block->host, block->used_length, md); -> -+ for(i = 0; i < MD5_DIGEST_LENGTH; i++) { -> -+ fprintf(stderr, "%02x", md[i]); -> -+ } -> -+ fprintf(stderr, "\n"); -> -+ error_report("end ram md5"); -> -+} -> -+ -> -void qemu_savevm_state_begin(QEMUFile *f, -> -const MigrationParams *params) -> -{ -> -@@ -1056,6 +1079,10 @@ void qemu_savevm_state_complete_precopy(QEMUFile *f, -> -bool iterable_only) -> -save_section_header(f, se, QEMU_VM_SECTION_END); -> -> -ret = se->ops->save_live_complete_precopy(f, se->opaque); -> -+ -> -+ fprintf(stderr, "after saving %s complete\n", se->idstr); -> -+ check_host_md5(); -> -+ -> -trace_savevm_section_end(se->idstr, se->section_id, ret); -> -save_section_footer(f, se); -> -if (ret < 0) { -> -@@ -1791,6 +1818,11 @@ static int qemu_loadvm_state_main(QEMUFile *f, -> -MigrationIncomingState *mis) -> -section_id, le->se->idstr); -> -return ret; -> -} -> -+ if (section_type == QEMU_VM_SECTION_END) { -> -+ error_report("after loading state section id %d(%s)", -> -+ section_id, le->se->idstr); -> -+ check_host_md5(); -> -+ } -> -if (!check_section_footer(f, le)) { -> -return -EINVAL; -> -} -> -@@ -1901,6 +1933,8 @@ int qemu_loadvm_state(QEMUFile *f) -> -} -> -> -cpu_synchronize_all_post_init(); -> -+ error_report("%s: after cpu_synchronize_all_post_init\n", __func__); -> -+ check_host_md5(); -> -> -return ret; -> -} -> -> -> --- -Dr. David Alan Gilbert / address@hidden / Manchester, UK - -On 2015/12/3 17:24, Dr. David Alan Gilbert wrote: -* Li Zhijian (address@hidden) wrote: -Hi all, - -Does anyboday remember the similar issue post by hailiang months ago -http://patchwork.ozlabs.org/patch/454322/ -At least tow bugs about migration had been fixed since that. -Yes, I wondered what happened to that. -And now we found the same issue at the tcg vm(kvm is fine), after migration, -the content VM's memory is inconsistent. -Hmm, TCG only - I don't know much about that; but I guess something must -be accessing memory without using the proper macros/functions so -it doesn't mark it as dirty. -we add a patch to check memory content, you can find it from affix - -steps to reporduce: -1) apply the patch and re-build qemu -2) prepare the ubuntu guest and run memtest in grub. -soruce side: -x86_64-softmmu/qemu-system-x86_64 -netdev tap,id=hn0 -device -e1000,id=net-pci0,netdev=hn0,mac=52:54:00:12:34:65 -boot c -drive -if=none,file=/home/lizj/ubuntu.raw,id=drive-virtio-disk0 -device -virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 --vnc :7 -m 128 -smp 1 -device piix3-usb-uhci -device usb-tablet -qmp -tcp::4444,server,nowait -monitor stdio -cpu qemu64 -machine -pc-i440fx-2.3,accel=tcg,usb=off - -destination side: -x86_64-softmmu/qemu-system-x86_64 -netdev tap,id=hn0 -device -e1000,id=net-pci0,netdev=hn0,mac=52:54:00:12:34:65 -boot c -drive -if=none,file=/home/lizj/ubuntu.raw,id=drive-virtio-disk0 -device -virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 --vnc :7 -m 128 -smp 1 -device piix3-usb-uhci -device usb-tablet -qmp -tcp::4444,server,nowait -monitor stdio -cpu qemu64 -machine -pc-i440fx-2.3,accel=tcg,usb=off -incoming tcp:0:8881 - -3) start migration -with 1000M NIC, migration will finish within 3 min. - -at source: -(qemu) migrate tcp:192.168.2.66:8881 -after saving ram complete -e9e725df678d392b1a83b3a917f332bb -qemu-system-x86_64: end ram md5 -(qemu) - -at destination: -...skip... -Completed load of VM with exit code 0 seq iteration 1264 -Completed load of VM with exit code 0 seq iteration 1265 -Completed load of VM with exit code 0 seq iteration 1266 -qemu-system-x86_64: after loading state section id 2(ram) -49c2dac7bde0e5e22db7280dcb3824f9 -qemu-system-x86_64: end ram md5 -qemu-system-x86_64: qemu_loadvm_state: after cpu_synchronize_all_post_init - -49c2dac7bde0e5e22db7280dcb3824f9 -qemu-system-x86_64: end ram md5 - -This occurs occasionally and only at tcg machine. It seems that -some pages dirtied in source side don't transferred to destination. -This problem can be reproduced even if we disable virtio. - -Is it OK for some pages that not transferred to destination when do -migration ? Or is it a bug? -I'm pretty sure that means it's a bug. Hard to find though, I guess -at least memtest is smaller than a big OS. I think I'd dump the whole -of memory on both sides, hexdump and diff them - I'd guess it would -just be one byte/word different, maybe that would offer some idea what -wrote it. -Maybe one better way to do that is with the help of userfaultfd's write-protect -capability. It is still in the development by Andrea Arcangeli, but there -is a RFC version available, please refer to -http://www.spinics.net/lists/linux-mm/msg97422.html -(I'm developing live memory snapshot which based on it, maybe this is another -scene where we -can use userfaultfd's WP ;) ). -Dave -Any idea... - -=================md5 check patch============================= - -diff --git a/Makefile.target b/Makefile.target -index 962d004..e2cb8e9 100644 ---- a/Makefile.target -+++ b/Makefile.target -@@ -139,7 +139,7 @@ obj-y += memory.o cputlb.o - obj-y += memory_mapping.o - obj-y += dump.o - obj-y += migration/ram.o migration/savevm.o --LIBS := $(libs_softmmu) $(LIBS) -+LIBS := $(libs_softmmu) $(LIBS) -lplumb - - # xen support - obj-$(CONFIG_XEN) += xen-common.o -diff --git a/migration/ram.c b/migration/ram.c -index 1eb155a..3b7a09d 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -2513,7 +2513,7 @@ static int ram_load(QEMUFile *f, void *opaque, int -version_id) - } - - rcu_read_unlock(); -- DPRINTF("Completed load of VM with exit code %d seq iteration " -+ fprintf(stderr, "Completed load of VM with exit code %d seq iteration " - "%" PRIu64 "\n", ret, seq_iter); - return ret; - } -diff --git a/migration/savevm.c b/migration/savevm.c -index 0ad1b93..3feaa61 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -891,6 +891,29 @@ void qemu_savevm_state_header(QEMUFile *f) - - } - -+#include "exec/ram_addr.h" -+#include "qemu/rcu_queue.h" -+#include -+#ifndef MD5_DIGEST_LENGTH -+#define MD5_DIGEST_LENGTH 16 -+#endif -+ -+static void check_host_md5(void) -+{ -+ int i; -+ unsigned char md[MD5_DIGEST_LENGTH]; -+ rcu_read_lock(); -+ RAMBlock *block = QLIST_FIRST_RCU(&ram_list.blocks);/* Only check -'pc.ram' block */ -+ rcu_read_unlock(); -+ -+ MD5(block->host, block->used_length, md); -+ for(i = 0; i < MD5_DIGEST_LENGTH; i++) { -+ fprintf(stderr, "%02x", md[i]); -+ } -+ fprintf(stderr, "\n"); -+ error_report("end ram md5"); -+} -+ - void qemu_savevm_state_begin(QEMUFile *f, - const MigrationParams *params) - { -@@ -1056,6 +1079,10 @@ void qemu_savevm_state_complete_precopy(QEMUFile *f, -bool iterable_only) - save_section_header(f, se, QEMU_VM_SECTION_END); - - ret = se->ops->save_live_complete_precopy(f, se->opaque); -+ -+ fprintf(stderr, "after saving %s complete\n", se->idstr); -+ check_host_md5(); -+ - trace_savevm_section_end(se->idstr, se->section_id, ret); - save_section_footer(f, se); - if (ret < 0) { -@@ -1791,6 +1818,11 @@ static int qemu_loadvm_state_main(QEMUFile *f, -MigrationIncomingState *mis) - section_id, le->se->idstr); - return ret; - } -+ if (section_type == QEMU_VM_SECTION_END) { -+ error_report("after loading state section id %d(%s)", -+ section_id, le->se->idstr); -+ check_host_md5(); -+ } - if (!check_section_footer(f, le)) { - return -EINVAL; - } -@@ -1901,6 +1933,8 @@ int qemu_loadvm_state(QEMUFile *f) - } - - cpu_synchronize_all_post_init(); -+ error_report("%s: after cpu_synchronize_all_post_init\n", __func__); -+ check_host_md5(); - - return ret; - } --- -Dr. David Alan Gilbert / address@hidden / Manchester, UK - -. - -On 12/03/2015 05:37 PM, Hailiang Zhang wrote: -On 2015/12/3 17:24, Dr. David Alan Gilbert wrote: -* Li Zhijian (address@hidden) wrote: -Hi all, - -Does anyboday remember the similar issue post by hailiang months ago -http://patchwork.ozlabs.org/patch/454322/ -At least tow bugs about migration had been fixed since that. -Yes, I wondered what happened to that. -And now we found the same issue at the tcg vm(kvm is fine), after -migration, -the content VM's memory is inconsistent. -Hmm, TCG only - I don't know much about that; but I guess something must -be accessing memory without using the proper macros/functions so -it doesn't mark it as dirty. -we add a patch to check memory content, you can find it from affix - -steps to reporduce: -1) apply the patch and re-build qemu -2) prepare the ubuntu guest and run memtest in grub. -soruce side: -x86_64-softmmu/qemu-system-x86_64 -netdev tap,id=hn0 -device -e1000,id=net-pci0,netdev=hn0,mac=52:54:00:12:34:65 -boot c -drive -if=none,file=/home/lizj/ubuntu.raw,id=drive-virtio-disk0 -device -virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 - --vnc :7 -m 128 -smp 1 -device piix3-usb-uhci -device usb-tablet -qmp -tcp::4444,server,nowait -monitor stdio -cpu qemu64 -machine -pc-i440fx-2.3,accel=tcg,usb=off - -destination side: -x86_64-softmmu/qemu-system-x86_64 -netdev tap,id=hn0 -device -e1000,id=net-pci0,netdev=hn0,mac=52:54:00:12:34:65 -boot c -drive -if=none,file=/home/lizj/ubuntu.raw,id=drive-virtio-disk0 -device -virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 - --vnc :7 -m 128 -smp 1 -device piix3-usb-uhci -device usb-tablet -qmp -tcp::4444,server,nowait -monitor stdio -cpu qemu64 -machine -pc-i440fx-2.3,accel=tcg,usb=off -incoming tcp:0:8881 - -3) start migration -with 1000M NIC, migration will finish within 3 min. - -at source: -(qemu) migrate tcp:192.168.2.66:8881 -after saving ram complete -e9e725df678d392b1a83b3a917f332bb -qemu-system-x86_64: end ram md5 -(qemu) - -at destination: -...skip... -Completed load of VM with exit code 0 seq iteration 1264 -Completed load of VM with exit code 0 seq iteration 1265 -Completed load of VM with exit code 0 seq iteration 1266 -qemu-system-x86_64: after loading state section id 2(ram) -49c2dac7bde0e5e22db7280dcb3824f9 -qemu-system-x86_64: end ram md5 -qemu-system-x86_64: qemu_loadvm_state: after -cpu_synchronize_all_post_init - -49c2dac7bde0e5e22db7280dcb3824f9 -qemu-system-x86_64: end ram md5 - -This occurs occasionally and only at tcg machine. It seems that -some pages dirtied in source side don't transferred to destination. -This problem can be reproduced even if we disable virtio. - -Is it OK for some pages that not transferred to destination when do -migration ? Or is it a bug? -I'm pretty sure that means it's a bug. Hard to find though, I guess -at least memtest is smaller than a big OS. I think I'd dump the whole -of memory on both sides, hexdump and diff them - I'd guess it would -just be one byte/word different, maybe that would offer some idea what -wrote it. -Maybe one better way to do that is with the help of userfaultfd's -write-protect -capability. It is still in the development by Andrea Arcangeli, but there -is a RFC version available, please refer to -http://www.spinics.net/lists/linux-mm/msg97422.html -(I'm developing live memory snapshot which based on it, maybe this is -another scene where we -can use userfaultfd's WP ;) ). -sounds good. - -thanks -Li -Dave -Any idea... - -=================md5 check patch============================= - -diff --git a/Makefile.target b/Makefile.target -index 962d004..e2cb8e9 100644 ---- a/Makefile.target -+++ b/Makefile.target -@@ -139,7 +139,7 @@ obj-y += memory.o cputlb.o - obj-y += memory_mapping.o - obj-y += dump.o - obj-y += migration/ram.o migration/savevm.o --LIBS := $(libs_softmmu) $(LIBS) -+LIBS := $(libs_softmmu) $(LIBS) -lplumb - - # xen support - obj-$(CONFIG_XEN) += xen-common.o -diff --git a/migration/ram.c b/migration/ram.c -index 1eb155a..3b7a09d 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -2513,7 +2513,7 @@ static int ram_load(QEMUFile *f, void *opaque, int -version_id) - } - - rcu_read_unlock(); -- DPRINTF("Completed load of VM with exit code %d seq iteration " -+ fprintf(stderr, "Completed load of VM with exit code %d seq -iteration " - "%" PRIu64 "\n", ret, seq_iter); - return ret; - } -diff --git a/migration/savevm.c b/migration/savevm.c -index 0ad1b93..3feaa61 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -891,6 +891,29 @@ void qemu_savevm_state_header(QEMUFile *f) - - } - -+#include "exec/ram_addr.h" -+#include "qemu/rcu_queue.h" -+#include -+#ifndef MD5_DIGEST_LENGTH -+#define MD5_DIGEST_LENGTH 16 -+#endif -+ -+static void check_host_md5(void) -+{ -+ int i; -+ unsigned char md[MD5_DIGEST_LENGTH]; -+ rcu_read_lock(); -+ RAMBlock *block = QLIST_FIRST_RCU(&ram_list.blocks);/* Only check -'pc.ram' block */ -+ rcu_read_unlock(); -+ -+ MD5(block->host, block->used_length, md); -+ for(i = 0; i < MD5_DIGEST_LENGTH; i++) { -+ fprintf(stderr, "%02x", md[i]); -+ } -+ fprintf(stderr, "\n"); -+ error_report("end ram md5"); -+} -+ - void qemu_savevm_state_begin(QEMUFile *f, - const MigrationParams *params) - { -@@ -1056,6 +1079,10 @@ void -qemu_savevm_state_complete_precopy(QEMUFile *f, -bool iterable_only) - save_section_header(f, se, QEMU_VM_SECTION_END); - - ret = se->ops->save_live_complete_precopy(f, se->opaque); -+ -+ fprintf(stderr, "after saving %s complete\n", se->idstr); -+ check_host_md5(); -+ - trace_savevm_section_end(se->idstr, se->section_id, ret); - save_section_footer(f, se); - if (ret < 0) { -@@ -1791,6 +1818,11 @@ static int qemu_loadvm_state_main(QEMUFile *f, -MigrationIncomingState *mis) - section_id, le->se->idstr); - return ret; - } -+ if (section_type == QEMU_VM_SECTION_END) { -+ error_report("after loading state section id %d(%s)", -+ section_id, le->se->idstr); -+ check_host_md5(); -+ } - if (!check_section_footer(f, le)) { - return -EINVAL; - } -@@ -1901,6 +1933,8 @@ int qemu_loadvm_state(QEMUFile *f) - } - - cpu_synchronize_all_post_init(); -+ error_report("%s: after cpu_synchronize_all_post_init\n", -__func__); -+ check_host_md5(); - - return ret; - } --- -Dr. David Alan Gilbert / address@hidden / Manchester, UK - -. -. --- -Best regards. -Li Zhijian (8555) - -On 12/03/2015 05:24 PM, Dr. David Alan Gilbert wrote: -* Li Zhijian (address@hidden) wrote: -Hi all, - -Does anyboday remember the similar issue post by hailiang months ago -http://patchwork.ozlabs.org/patch/454322/ -At least tow bugs about migration had been fixed since that. -Yes, I wondered what happened to that. -And now we found the same issue at the tcg vm(kvm is fine), after migration, -the content VM's memory is inconsistent. -Hmm, TCG only - I don't know much about that; but I guess something must -be accessing memory without using the proper macros/functions so -it doesn't mark it as dirty. -we add a patch to check memory content, you can find it from affix - -steps to reporduce: -1) apply the patch and re-build qemu -2) prepare the ubuntu guest and run memtest in grub. -soruce side: -x86_64-softmmu/qemu-system-x86_64 -netdev tap,id=hn0 -device -e1000,id=net-pci0,netdev=hn0,mac=52:54:00:12:34:65 -boot c -drive -if=none,file=/home/lizj/ubuntu.raw,id=drive-virtio-disk0 -device -virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 --vnc :7 -m 128 -smp 1 -device piix3-usb-uhci -device usb-tablet -qmp -tcp::4444,server,nowait -monitor stdio -cpu qemu64 -machine -pc-i440fx-2.3,accel=tcg,usb=off - -destination side: -x86_64-softmmu/qemu-system-x86_64 -netdev tap,id=hn0 -device -e1000,id=net-pci0,netdev=hn0,mac=52:54:00:12:34:65 -boot c -drive -if=none,file=/home/lizj/ubuntu.raw,id=drive-virtio-disk0 -device -virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 --vnc :7 -m 128 -smp 1 -device piix3-usb-uhci -device usb-tablet -qmp -tcp::4444,server,nowait -monitor stdio -cpu qemu64 -machine -pc-i440fx-2.3,accel=tcg,usb=off -incoming tcp:0:8881 - -3) start migration -with 1000M NIC, migration will finish within 3 min. - -at source: -(qemu) migrate tcp:192.168.2.66:8881 -after saving ram complete -e9e725df678d392b1a83b3a917f332bb -qemu-system-x86_64: end ram md5 -(qemu) - -at destination: -...skip... -Completed load of VM with exit code 0 seq iteration 1264 -Completed load of VM with exit code 0 seq iteration 1265 -Completed load of VM with exit code 0 seq iteration 1266 -qemu-system-x86_64: after loading state section id 2(ram) -49c2dac7bde0e5e22db7280dcb3824f9 -qemu-system-x86_64: end ram md5 -qemu-system-x86_64: qemu_loadvm_state: after cpu_synchronize_all_post_init - -49c2dac7bde0e5e22db7280dcb3824f9 -qemu-system-x86_64: end ram md5 - -This occurs occasionally and only at tcg machine. It seems that -some pages dirtied in source side don't transferred to destination. -This problem can be reproduced even if we disable virtio. - -Is it OK for some pages that not transferred to destination when do -migration ? Or is it a bug? -I'm pretty sure that means it's a bug. Hard to find though, I guess -at least memtest is smaller than a big OS. I think I'd dump the whole -of memory on both sides, hexdump and diff them - I'd guess it would -just be one byte/word different, maybe that would offer some idea what -wrote it. -I try to dump and compare them, more than 10 pages are different. -in source side, they are random value rather than always 'FF' 'FB' 'EF' -'BF'... in destination. -and not all of the different pages are continuous. - -thanks -Li -Dave -Any idea... - -=================md5 check patch============================= - -diff --git a/Makefile.target b/Makefile.target -index 962d004..e2cb8e9 100644 ---- a/Makefile.target -+++ b/Makefile.target -@@ -139,7 +139,7 @@ obj-y += memory.o cputlb.o - obj-y += memory_mapping.o - obj-y += dump.o - obj-y += migration/ram.o migration/savevm.o --LIBS := $(libs_softmmu) $(LIBS) -+LIBS := $(libs_softmmu) $(LIBS) -lplumb - - # xen support - obj-$(CONFIG_XEN) += xen-common.o -diff --git a/migration/ram.c b/migration/ram.c -index 1eb155a..3b7a09d 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -2513,7 +2513,7 @@ static int ram_load(QEMUFile *f, void *opaque, int -version_id) - } - - rcu_read_unlock(); -- DPRINTF("Completed load of VM with exit code %d seq iteration " -+ fprintf(stderr, "Completed load of VM with exit code %d seq iteration " - "%" PRIu64 "\n", ret, seq_iter); - return ret; - } -diff --git a/migration/savevm.c b/migration/savevm.c -index 0ad1b93..3feaa61 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -891,6 +891,29 @@ void qemu_savevm_state_header(QEMUFile *f) - - } - -+#include "exec/ram_addr.h" -+#include "qemu/rcu_queue.h" -+#include -+#ifndef MD5_DIGEST_LENGTH -+#define MD5_DIGEST_LENGTH 16 -+#endif -+ -+static void check_host_md5(void) -+{ -+ int i; -+ unsigned char md[MD5_DIGEST_LENGTH]; -+ rcu_read_lock(); -+ RAMBlock *block = QLIST_FIRST_RCU(&ram_list.blocks);/* Only check -'pc.ram' block */ -+ rcu_read_unlock(); -+ -+ MD5(block->host, block->used_length, md); -+ for(i = 0; i < MD5_DIGEST_LENGTH; i++) { -+ fprintf(stderr, "%02x", md[i]); -+ } -+ fprintf(stderr, "\n"); -+ error_report("end ram md5"); -+} -+ - void qemu_savevm_state_begin(QEMUFile *f, - const MigrationParams *params) - { -@@ -1056,6 +1079,10 @@ void qemu_savevm_state_complete_precopy(QEMUFile *f, -bool iterable_only) - save_section_header(f, se, QEMU_VM_SECTION_END); - - ret = se->ops->save_live_complete_precopy(f, se->opaque); -+ -+ fprintf(stderr, "after saving %s complete\n", se->idstr); -+ check_host_md5(); -+ - trace_savevm_section_end(se->idstr, se->section_id, ret); - save_section_footer(f, se); - if (ret < 0) { -@@ -1791,6 +1818,11 @@ static int qemu_loadvm_state_main(QEMUFile *f, -MigrationIncomingState *mis) - section_id, le->se->idstr); - return ret; - } -+ if (section_type == QEMU_VM_SECTION_END) { -+ error_report("after loading state section id %d(%s)", -+ section_id, le->se->idstr); -+ check_host_md5(); -+ } - if (!check_section_footer(f, le)) { - return -EINVAL; - } -@@ -1901,6 +1933,8 @@ int qemu_loadvm_state(QEMUFile *f) - } - - cpu_synchronize_all_post_init(); -+ error_report("%s: after cpu_synchronize_all_post_init\n", __func__); -+ check_host_md5(); - - return ret; - } --- -Dr. David Alan Gilbert / address@hidden / Manchester, UK - - -. --- -Best regards. -Li Zhijian (8555) - -* Li Zhijian (address@hidden) wrote: -> -> -> -On 12/03/2015 05:24 PM, Dr. David Alan Gilbert wrote: -> ->* Li Zhijian (address@hidden) wrote: -> ->>Hi all, -> ->> -> ->>Does anyboday remember the similar issue post by hailiang months ago -> ->> -http://patchwork.ozlabs.org/patch/454322/ -> ->>At least tow bugs about migration had been fixed since that. -> -> -> ->Yes, I wondered what happened to that. -> -> -> ->>And now we found the same issue at the tcg vm(kvm is fine), after migration, -> ->>the content VM's memory is inconsistent. -> -> -> ->Hmm, TCG only - I don't know much about that; but I guess something must -> ->be accessing memory without using the proper macros/functions so -> ->it doesn't mark it as dirty. -> -> -> ->>we add a patch to check memory content, you can find it from affix -> ->> -> ->>steps to reporduce: -> ->>1) apply the patch and re-build qemu -> ->>2) prepare the ubuntu guest and run memtest in grub. -> ->>soruce side: -> ->>x86_64-softmmu/qemu-system-x86_64 -netdev tap,id=hn0 -device -> ->>e1000,id=net-pci0,netdev=hn0,mac=52:54:00:12:34:65 -boot c -drive -> ->>if=none,file=/home/lizj/ubuntu.raw,id=drive-virtio-disk0 -device -> ->>virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 -> ->>-vnc :7 -m 128 -smp 1 -device piix3-usb-uhci -device usb-tablet -qmp -> ->>tcp::4444,server,nowait -monitor stdio -cpu qemu64 -machine -> ->>pc-i440fx-2.3,accel=tcg,usb=off -> ->> -> ->>destination side: -> ->>x86_64-softmmu/qemu-system-x86_64 -netdev tap,id=hn0 -device -> ->>e1000,id=net-pci0,netdev=hn0,mac=52:54:00:12:34:65 -boot c -drive -> ->>if=none,file=/home/lizj/ubuntu.raw,id=drive-virtio-disk0 -device -> ->>virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 -> ->>-vnc :7 -m 128 -smp 1 -device piix3-usb-uhci -device usb-tablet -qmp -> ->>tcp::4444,server,nowait -monitor stdio -cpu qemu64 -machine -> ->>pc-i440fx-2.3,accel=tcg,usb=off -incoming tcp:0:8881 -> ->> -> ->>3) start migration -> ->>with 1000M NIC, migration will finish within 3 min. -> ->> -> ->>at source: -> ->>(qemu) migrate tcp:192.168.2.66:8881 -> ->>after saving ram complete -> ->>e9e725df678d392b1a83b3a917f332bb -> ->>qemu-system-x86_64: end ram md5 -> ->>(qemu) -> ->> -> ->>at destination: -> ->>...skip... -> ->>Completed load of VM with exit code 0 seq iteration 1264 -> ->>Completed load of VM with exit code 0 seq iteration 1265 -> ->>Completed load of VM with exit code 0 seq iteration 1266 -> ->>qemu-system-x86_64: after loading state section id 2(ram) -> ->>49c2dac7bde0e5e22db7280dcb3824f9 -> ->>qemu-system-x86_64: end ram md5 -> ->>qemu-system-x86_64: qemu_loadvm_state: after cpu_synchronize_all_post_init -> ->> -> ->>49c2dac7bde0e5e22db7280dcb3824f9 -> ->>qemu-system-x86_64: end ram md5 -> ->> -> ->>This occurs occasionally and only at tcg machine. It seems that -> ->>some pages dirtied in source side don't transferred to destination. -> ->>This problem can be reproduced even if we disable virtio. -> ->> -> ->>Is it OK for some pages that not transferred to destination when do -> ->>migration ? Or is it a bug? -> -> -> ->I'm pretty sure that means it's a bug. Hard to find though, I guess -> ->at least memtest is smaller than a big OS. I think I'd dump the whole -> ->of memory on both sides, hexdump and diff them - I'd guess it would -> ->just be one byte/word different, maybe that would offer some idea what -> ->wrote it. -> -> -I try to dump and compare them, more than 10 pages are different. -> -in source side, they are random value rather than always 'FF' 'FB' 'EF' -> -'BF'... in destination. -> -> -and not all of the different pages are continuous. -I wonder if it happens on all of memtest's different test patterns, -perhaps it might be possible to narrow it down if you tell memtest -to only run one test at a time. - -Dave - -> -> -thanks -> -Li -> -> -> -> -> ->Dave -> -> -> ->>Any idea... -> ->> -> ->>=================md5 check patch============================= -> ->> -> ->>diff --git a/Makefile.target b/Makefile.target -> ->>index 962d004..e2cb8e9 100644 -> ->>--- a/Makefile.target -> ->>+++ b/Makefile.target -> ->>@@ -139,7 +139,7 @@ obj-y += memory.o cputlb.o -> ->> obj-y += memory_mapping.o -> ->> obj-y += dump.o -> ->> obj-y += migration/ram.o migration/savevm.o -> ->>-LIBS := $(libs_softmmu) $(LIBS) -> ->>+LIBS := $(libs_softmmu) $(LIBS) -lplumb -> ->> -> ->> # xen support -> ->> obj-$(CONFIG_XEN) += xen-common.o -> ->>diff --git a/migration/ram.c b/migration/ram.c -> ->>index 1eb155a..3b7a09d 100644 -> ->>--- a/migration/ram.c -> ->>+++ b/migration/ram.c -> ->>@@ -2513,7 +2513,7 @@ static int ram_load(QEMUFile *f, void *opaque, int -> ->>version_id) -> ->> } -> ->> -> ->> rcu_read_unlock(); -> ->>- DPRINTF("Completed load of VM with exit code %d seq iteration " -> ->>+ fprintf(stderr, "Completed load of VM with exit code %d seq iteration " -> ->> "%" PRIu64 "\n", ret, seq_iter); -> ->> return ret; -> ->> } -> ->>diff --git a/migration/savevm.c b/migration/savevm.c -> ->>index 0ad1b93..3feaa61 100644 -> ->>--- a/migration/savevm.c -> ->>+++ b/migration/savevm.c -> ->>@@ -891,6 +891,29 @@ void qemu_savevm_state_header(QEMUFile *f) -> ->> -> ->> } -> ->> -> ->>+#include "exec/ram_addr.h" -> ->>+#include "qemu/rcu_queue.h" -> ->>+#include -> ->>+#ifndef MD5_DIGEST_LENGTH -> ->>+#define MD5_DIGEST_LENGTH 16 -> ->>+#endif -> ->>+ -> ->>+static void check_host_md5(void) -> ->>+{ -> ->>+ int i; -> ->>+ unsigned char md[MD5_DIGEST_LENGTH]; -> ->>+ rcu_read_lock(); -> ->>+ RAMBlock *block = QLIST_FIRST_RCU(&ram_list.blocks);/* Only check -> ->>'pc.ram' block */ -> ->>+ rcu_read_unlock(); -> ->>+ -> ->>+ MD5(block->host, block->used_length, md); -> ->>+ for(i = 0; i < MD5_DIGEST_LENGTH; i++) { -> ->>+ fprintf(stderr, "%02x", md[i]); -> ->>+ } -> ->>+ fprintf(stderr, "\n"); -> ->>+ error_report("end ram md5"); -> ->>+} -> ->>+ -> ->> void qemu_savevm_state_begin(QEMUFile *f, -> ->> const MigrationParams *params) -> ->> { -> ->>@@ -1056,6 +1079,10 @@ void qemu_savevm_state_complete_precopy(QEMUFile *f, -> ->>bool iterable_only) -> ->> save_section_header(f, se, QEMU_VM_SECTION_END); -> ->> -> ->> ret = se->ops->save_live_complete_precopy(f, se->opaque); -> ->>+ -> ->>+ fprintf(stderr, "after saving %s complete\n", se->idstr); -> ->>+ check_host_md5(); -> ->>+ -> ->> trace_savevm_section_end(se->idstr, se->section_id, ret); -> ->> save_section_footer(f, se); -> ->> if (ret < 0) { -> ->>@@ -1791,6 +1818,11 @@ static int qemu_loadvm_state_main(QEMUFile *f, -> ->>MigrationIncomingState *mis) -> ->> section_id, le->se->idstr); -> ->> return ret; -> ->> } -> ->>+ if (section_type == QEMU_VM_SECTION_END) { -> ->>+ error_report("after loading state section id %d(%s)", -> ->>+ section_id, le->se->idstr); -> ->>+ check_host_md5(); -> ->>+ } -> ->> if (!check_section_footer(f, le)) { -> ->> return -EINVAL; -> ->> } -> ->>@@ -1901,6 +1933,8 @@ int qemu_loadvm_state(QEMUFile *f) -> ->> } -> ->> -> ->> cpu_synchronize_all_post_init(); -> ->>+ error_report("%s: after cpu_synchronize_all_post_init\n", __func__); -> ->>+ check_host_md5(); -> ->> -> ->> return ret; -> ->> } -> ->> -> ->> -> ->> -> ->-- -> ->Dr. David Alan Gilbert / address@hidden / Manchester, UK -> -> -> -> -> ->. -> -> -> -> --- -> -Best regards. -> -Li Zhijian (8555) -> -> --- -Dr. David Alan Gilbert / address@hidden / Manchester, UK - -Li Zhijian wrote: -> -Hi all, -> -> -Does anyboday remember the similar issue post by hailiang months ago -> -http://patchwork.ozlabs.org/patch/454322/ -> -At least tow bugs about migration had been fixed since that. -> -> -And now we found the same issue at the tcg vm(kvm is fine), after -> -migration, the content VM's memory is inconsistent. -> -> -we add a patch to check memory content, you can find it from affix -> -> -steps to reporduce: -> -1) apply the patch and re-build qemu -> -2) prepare the ubuntu guest and run memtest in grub. -> -soruce side: -> -x86_64-softmmu/qemu-system-x86_64 -netdev tap,id=hn0 -device -> -e1000,id=net-pci0,netdev=hn0,mac=52:54:00:12:34:65 -boot c -drive -> -if=none,file=/home/lizj/ubuntu.raw,id=drive-virtio-disk0 -device -> -virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 -> --vnc :7 -m 128 -smp 1 -device piix3-usb-uhci -device usb-tablet -qmp -> -tcp::4444,server,nowait -monitor stdio -cpu qemu64 -machine -> -pc-i440fx-2.3,accel=tcg,usb=off -> -> -destination side: -> -x86_64-softmmu/qemu-system-x86_64 -netdev tap,id=hn0 -device -> -e1000,id=net-pci0,netdev=hn0,mac=52:54:00:12:34:65 -boot c -drive -> -if=none,file=/home/lizj/ubuntu.raw,id=drive-virtio-disk0 -device -> -virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 -> --vnc :7 -m 128 -smp 1 -device piix3-usb-uhci -device usb-tablet -qmp -> -tcp::4444,server,nowait -monitor stdio -cpu qemu64 -machine -> -pc-i440fx-2.3,accel=tcg,usb=off -incoming tcp:0:8881 -> -> -3) start migration -> -with 1000M NIC, migration will finish within 3 min. -> -> -at source: -> -(qemu) migrate tcp:192.168.2.66:8881 -> -after saving ram complete -> -e9e725df678d392b1a83b3a917f332bb -> -qemu-system-x86_64: end ram md5 -> -(qemu) -> -> -at destination: -> -...skip... -> -Completed load of VM with exit code 0 seq iteration 1264 -> -Completed load of VM with exit code 0 seq iteration 1265 -> -Completed load of VM with exit code 0 seq iteration 1266 -> -qemu-system-x86_64: after loading state section id 2(ram) -> -49c2dac7bde0e5e22db7280dcb3824f9 -> -qemu-system-x86_64: end ram md5 -> -qemu-system-x86_64: qemu_loadvm_state: after cpu_synchronize_all_post_init -> -> -49c2dac7bde0e5e22db7280dcb3824f9 -> -qemu-system-x86_64: end ram md5 -> -> -This occurs occasionally and only at tcg machine. It seems that -> -some pages dirtied in source side don't transferred to destination. -> -This problem can be reproduced even if we disable virtio. -> -> -Is it OK for some pages that not transferred to destination when do -> -migration ? Or is it a bug? -> -> -Any idea... -Thanks for describing how to reproduce the bug. -If some pages are not transferred to destination then it is a bug, so we -need to know what the problem is, notice that the problem can be that -TCG is not marking dirty some page, that Migration code "forgets" about -that page, or anything eles altogether, that is what we need to find. - -There are more posibilities, I am not sure that memtest is on 32bit -mode, and it is inside posibility that we are missing some state when we -are on real mode. - -Will try to take a look at this. - -THanks, again. - - -> -> -=================md5 check patch============================= -> -> -diff --git a/Makefile.target b/Makefile.target -> -index 962d004..e2cb8e9 100644 -> ---- a/Makefile.target -> -+++ b/Makefile.target -> -@@ -139,7 +139,7 @@ obj-y += memory.o cputlb.o -> -obj-y += memory_mapping.o -> -obj-y += dump.o -> -obj-y += migration/ram.o migration/savevm.o -> --LIBS := $(libs_softmmu) $(LIBS) -> -+LIBS := $(libs_softmmu) $(LIBS) -lplumb -> -> -# xen support -> -obj-$(CONFIG_XEN) += xen-common.o -> -diff --git a/migration/ram.c b/migration/ram.c -> -index 1eb155a..3b7a09d 100644 -> ---- a/migration/ram.c -> -+++ b/migration/ram.c -> -@@ -2513,7 +2513,7 @@ static int ram_load(QEMUFile *f, void *opaque, -> -int version_id) -> -} -> -> -rcu_read_unlock(); -> -- DPRINTF("Completed load of VM with exit code %d seq iteration " -> -+ fprintf(stderr, "Completed load of VM with exit code %d seq iteration " -> -"%" PRIu64 "\n", ret, seq_iter); -> -return ret; -> -} -> -diff --git a/migration/savevm.c b/migration/savevm.c -> -index 0ad1b93..3feaa61 100644 -> ---- a/migration/savevm.c -> -+++ b/migration/savevm.c -> -@@ -891,6 +891,29 @@ void qemu_savevm_state_header(QEMUFile *f) -> -> -} -> -> -+#include "exec/ram_addr.h" -> -+#include "qemu/rcu_queue.h" -> -+#include -> -+#ifndef MD5_DIGEST_LENGTH -> -+#define MD5_DIGEST_LENGTH 16 -> -+#endif -> -+ -> -+static void check_host_md5(void) -> -+{ -> -+ int i; -> -+ unsigned char md[MD5_DIGEST_LENGTH]; -> -+ rcu_read_lock(); -> -+ RAMBlock *block = QLIST_FIRST_RCU(&ram_list.blocks);/* Only check -> -'pc.ram' block */ -> -+ rcu_read_unlock(); -> -+ -> -+ MD5(block->host, block->used_length, md); -> -+ for(i = 0; i < MD5_DIGEST_LENGTH; i++) { -> -+ fprintf(stderr, "%02x", md[i]); -> -+ } -> -+ fprintf(stderr, "\n"); -> -+ error_report("end ram md5"); -> -+} -> -+ -> -void qemu_savevm_state_begin(QEMUFile *f, -> -const MigrationParams *params) -> -{ -> -@@ -1056,6 +1079,10 @@ void -> -qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only) -> -save_section_header(f, se, QEMU_VM_SECTION_END); -> -> -ret = se->ops->save_live_complete_precopy(f, se->opaque); -> -+ -> -+ fprintf(stderr, "after saving %s complete\n", se->idstr); -> -+ check_host_md5(); -> -+ -> -trace_savevm_section_end(se->idstr, se->section_id, ret); -> -save_section_footer(f, se); -> -if (ret < 0) { -> -@@ -1791,6 +1818,11 @@ static int qemu_loadvm_state_main(QEMUFile *f, -> -MigrationIncomingState *mis) -> -section_id, le->se->idstr); -> -return ret; -> -} -> -+ if (section_type == QEMU_VM_SECTION_END) { -> -+ error_report("after loading state section id %d(%s)", -> -+ section_id, le->se->idstr); -> -+ check_host_md5(); -> -+ } -> -if (!check_section_footer(f, le)) { -> -return -EINVAL; -> -} -> -@@ -1901,6 +1933,8 @@ int qemu_loadvm_state(QEMUFile *f) -> -} -> -> -cpu_synchronize_all_post_init(); -> -+ error_report("%s: after cpu_synchronize_all_post_init\n", __func__); -> -+ check_host_md5(); -> -> -return ret; -> -} - -> -> -Thanks for describing how to reproduce the bug. -> -If some pages are not transferred to destination then it is a bug, so we need -> -to know what the problem is, notice that the problem can be that TCG is not -> -marking dirty some page, that Migration code "forgets" about that page, or -> -anything eles altogether, that is what we need to find. -> -> -There are more posibilities, I am not sure that memtest is on 32bit mode, and -> -it is inside posibility that we are missing some state when we are on real -> -mode. -> -> -Will try to take a look at this. -> -> -THanks, again. -> -Hi Juan & Amit - - Do you think we should add a mechanism to check the data integrity during LM -like Zhijian's patch did? it may be very helpful for developers. - Actually, I did the similar thing before in order to make sure that I did the -right thing we I change the code related to LM. - -Liang - -On (Fri) 04 Dec 2015 [01:43:07], Li, Liang Z wrote: -> -> -> -> Thanks for describing how to reproduce the bug. -> -> If some pages are not transferred to destination then it is a bug, so we -> -> need -> -> to know what the problem is, notice that the problem can be that TCG is not -> -> marking dirty some page, that Migration code "forgets" about that page, or -> -> anything eles altogether, that is what we need to find. -> -> -> -> There are more posibilities, I am not sure that memtest is on 32bit mode, -> -> and -> -> it is inside posibility that we are missing some state when we are on real -> -> mode. -> -> -> -> Will try to take a look at this. -> -> -> -> THanks, again. -> -> -> -> -Hi Juan & Amit -> -> -Do you think we should add a mechanism to check the data integrity during LM -> -like Zhijian's patch did? it may be very helpful for developers. -> -Actually, I did the similar thing before in order to make sure that I did -> -the right thing we I change the code related to LM. -If you mean for debugging, something that's not always on, then I'm -fine with it. - -A script that goes along that shows the result of comparison of the -diff will be helpful too, something that shows how many pages are -differnt, how many bytes in a page on average, and so on. - - Amit - diff --git a/classification_output/01/mistranslation/14887122 b/classification_output/01/mistranslation/14887122 new file mode 100644 index 000000000..f13db3b86 --- /dev/null +++ b/classification_output/01/mistranslation/14887122 @@ -0,0 +1,258 @@ +mistranslation: 0.930 +semantic: 0.928 +instruction: 0.905 +other: 0.890 + +[BUG][RFC] CPR transfer Issues: Socket permissions and PID files + +Hello, + +While testing CPR transfer I encountered two issues. The first is that the +transfer fails when running with pidfiles due to the destination qemu process +attempting to create the pidfile while it is still locked by the source +process. The second is that the transfer fails when running with the -run-with +user=$USERID parameter. This is because the destination qemu process creates +the UNIX sockets used for the CPR transfer before dropping to the lower +permissioned user, which causes them to be owned by the original user. The +source qemu process then does not have permission to connect to it because it +is already running as the lesser permissioned user. + +Reproducing the first issue: + +Create a source and destination qemu instance associated with the same VM where +both processes have the -pidfile parameter passed on the command line. You +should see the following error on the command line of the second process: + +qemu-system-x86_64: cannot create PID file: Cannot lock pid file: Resource +temporarily unavailable + +Reproducing the second issue: + +Create a source and destination qemu instance associated with the same VM where +both processes have -run-with user=$USERID passed on the command line, where +$USERID is a different user from the one launching the processes. Then attempt +a CPR transfer using UNIX sockets for the main and cpr sockets. You should +receive the following error via QMP: +{"error": {"class": "GenericError", "desc": "Failed to connect to 'cpr.sock': +Permission denied"}} + +I provided a minimal patch that works around the second issue. + +Thank you, +Ben Chaney + +--- +include/system/os-posix.h | 4 ++++ +os-posix.c | 8 -------- +util/qemu-sockets.c | 21 +++++++++++++++++++++ +3 files changed, 25 insertions(+), 8 deletions(-) + +diff --git a/include/system/os-posix.h b/include/system/os-posix.h +index ce5b3bccf8..2a414a914a 100644 +--- a/include/system/os-posix.h ++++ b/include/system/os-posix.h +@@ -55,6 +55,10 @@ void os_setup_limits(void); +void os_setup_post(void); +int os_mlock(bool on_fault); + ++extern struct passwd *user_pwd; ++extern uid_t user_uid; ++extern gid_t user_gid; ++ +/** +* qemu_alloc_stack: +* @sz: pointer to a size_t holding the requested usable stack size +diff --git a/os-posix.c b/os-posix.c +index 52925c23d3..9369b312a0 100644 +--- a/os-posix.c ++++ b/os-posix.c +@@ -86,14 +86,6 @@ void os_set_proc_name(const char *s) +} + + +-/* +- * Must set all three of these at once. +- * Legal combinations are unset by name by uid +- */ +-static struct passwd *user_pwd; /* NULL non-NULL NULL */ +-static uid_t user_uid = (uid_t)-1; /* -1 -1 >=0 */ +-static gid_t user_gid = (gid_t)-1; /* -1 -1 >=0 */ +- +/* +* Prepare to change user ID. user_id can be one of 3 forms: +* - a username, in which case user ID will be changed to its uid, +diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c +index 77477c1cd5..987977ead9 100644 +--- a/util/qemu-sockets.c ++++ b/util/qemu-sockets.c +@@ -871,6 +871,14 @@ static bool saddr_is_tight(UnixSocketAddress *saddr) +#endif +} + ++/* ++ * Must set all three of these at once. ++ * Legal combinations are unset by name by uid ++ */ ++struct passwd *user_pwd; /* NULL non-NULL NULL */ ++uid_t user_uid = (uid_t)-1; /* -1 -1 >=0 */ ++gid_t user_gid = (gid_t)-1; /* -1 -1 >=0 */ ++ +static int unix_listen_saddr(UnixSocketAddress *saddr, +int num, +Error **errp) +@@ -947,6 +955,19 @@ static int unix_listen_saddr(UnixSocketAddress *saddr, +error_setg_errno(errp, errno, "Failed to bind socket to %s", path); +goto err; +} ++ if (user_pwd) { ++ if (chown(un.sun_path, user_pwd->pw_uid, user_pwd->pw_gid) < 0) { ++ error_setg_errno(errp, errno, "Failed to change permissions on socket %s", +path); ++ goto err; ++ } ++ } ++ else if (user_uid != -1 && user_gid != -1) { ++ if (chown(un.sun_path, user_uid, user_gid) < 0) { ++ error_setg_errno(errp, errno, "Failed to change permissions on socket %s", +path); ++ goto err; ++ } ++ } ++ +if (listen(sock, num) < 0) { +error_setg_errno(errp, errno, "Failed to listen on socket"); +goto err; +-- +2.40.1 + +Thank you Ben. I appreciate you testing CPR and shaking out the bugs. +I will study these and propose patches. + +My initial reaction to the pidfile issue is that the orchestration layer must +pass a different filename when starting the destination qemu instance. When +using live update without containers, these types of resource conflicts in the +global namespaces are a known issue. + +- Steve + +On 3/14/2025 2:33 PM, Chaney, Ben wrote: +Hello, + +While testing CPR transfer I encountered two issues. The first is that the +transfer fails when running with pidfiles due to the destination qemu process +attempting to create the pidfile while it is still locked by the source +process. The second is that the transfer fails when running with the -run-with +user=$USERID parameter. This is because the destination qemu process creates +the UNIX sockets used for the CPR transfer before dropping to the lower +permissioned user, which causes them to be owned by the original user. The +source qemu process then does not have permission to connect to it because it +is already running as the lesser permissioned user. + +Reproducing the first issue: + +Create a source and destination qemu instance associated with the same VM where +both processes have the -pidfile parameter passed on the command line. You +should see the following error on the command line of the second process: + +qemu-system-x86_64: cannot create PID file: Cannot lock pid file: Resource +temporarily unavailable + +Reproducing the second issue: + +Create a source and destination qemu instance associated with the same VM where +both processes have -run-with user=$USERID passed on the command line, where +$USERID is a different user from the one launching the processes. Then attempt +a CPR transfer using UNIX sockets for the main and cpr sockets. You should +receive the following error via QMP: +{"error": {"class": "GenericError", "desc": "Failed to connect to 'cpr.sock': +Permission denied"}} + +I provided a minimal patch that works around the second issue. + +Thank you, +Ben Chaney + +--- +include/system/os-posix.h | 4 ++++ +os-posix.c | 8 -------- +util/qemu-sockets.c | 21 +++++++++++++++++++++ +3 files changed, 25 insertions(+), 8 deletions(-) + +diff --git a/include/system/os-posix.h b/include/system/os-posix.h +index ce5b3bccf8..2a414a914a 100644 +--- a/include/system/os-posix.h ++++ b/include/system/os-posix.h +@@ -55,6 +55,10 @@ void os_setup_limits(void); +void os_setup_post(void); +int os_mlock(bool on_fault); + ++extern struct passwd *user_pwd; ++extern uid_t user_uid; ++extern gid_t user_gid; ++ +/** +* qemu_alloc_stack: +* @sz: pointer to a size_t holding the requested usable stack size +diff --git a/os-posix.c b/os-posix.c +index 52925c23d3..9369b312a0 100644 +--- a/os-posix.c ++++ b/os-posix.c +@@ -86,14 +86,6 @@ void os_set_proc_name(const char *s) +} + + +-/* +- * Must set all three of these at once. +- * Legal combinations are unset by name by uid +- */ +-static struct passwd *user_pwd; /* NULL non-NULL NULL */ +-static uid_t user_uid = (uid_t)-1; /* -1 -1 >=0 */ +-static gid_t user_gid = (gid_t)-1; /* -1 -1 >=0 */ +- +/* +* Prepare to change user ID. user_id can be one of 3 forms: +* - a username, in which case user ID will be changed to its uid, +diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c +index 77477c1cd5..987977ead9 100644 +--- a/util/qemu-sockets.c ++++ b/util/qemu-sockets.c +@@ -871,6 +871,14 @@ static bool saddr_is_tight(UnixSocketAddress *saddr) +#endif +} + ++/* ++ * Must set all three of these at once. ++ * Legal combinations are unset by name by uid ++ */ ++struct passwd *user_pwd; /* NULL non-NULL NULL */ ++uid_t user_uid = (uid_t)-1; /* -1 -1 >=0 */ ++gid_t user_gid = (gid_t)-1; /* -1 -1 >=0 */ ++ +static int unix_listen_saddr(UnixSocketAddress *saddr, +int num, +Error **errp) +@@ -947,6 +955,19 @@ static int unix_listen_saddr(UnixSocketAddress *saddr, +error_setg_errno(errp, errno, "Failed to bind socket to %s", path); +goto err; +} ++ if (user_pwd) { ++ if (chown(un.sun_path, user_pwd->pw_uid, user_pwd->pw_gid) < 0) { ++ error_setg_errno(errp, errno, "Failed to change permissions on socket %s", +path); ++ goto err; ++ } ++ } ++ else if (user_uid != -1 && user_gid != -1) { ++ if (chown(un.sun_path, user_uid, user_gid) < 0) { ++ error_setg_errno(errp, errno, "Failed to change permissions on socket %s", +path); ++ goto err; ++ } ++ } ++ +if (listen(sock, num) < 0) { +error_setg_errno(errp, errno, "Failed to listen on socket"); +goto err; +-- +2.40.1 + diff --git a/classification_output/01/mistranslation/1693040 b/classification_output/01/mistranslation/1693040 deleted file mode 100644 index 67353acda..000000000 --- a/classification_output/01/mistranslation/1693040 +++ /dev/null @@ -1,1061 +0,0 @@ -mistranslation: 0.862 -semantic: 0.858 -instruction: 0.856 -other: 0.852 - -[Qemu-devel] 答复: Re: 答复: Re: 答复: Re: 答复: Re: [BUG]COLO failover hang - -hi: - -yes.it is better. - -And should we delete - - - - -#ifdef WIN32 - - QIO_CHANNEL(cioc)->event = CreateEvent(NULL, FALSE, FALSE, NULL) - -#endif - - - - -in qio_channel_socket_accept? - -qio_channel_socket_new already have it. - - - - - - - - - - - - -原始邮件 - - - -发件人: address@hidden -收件人:王广10165992 -抄送人: address@hidden address@hidden address@hidden address@hidden -日 期 :2017å¹´03月22日 15:03 -主 题 :Re: [Qemu-devel] 答复: Re: 答复: Re: 答复: Re: [BUG]COLO failover hang - - - - - -Hi, - -On 2017/3/22 9:42, address@hidden wrote: -> diff --git a/migration/socket.c b/migration/socket.c -> -> -> index 13966f1..d65a0ea 100644 -> -> -> --- a/migration/socket.c -> -> -> +++ b/migration/socket.c -> -> -> @@ -147,8 +147,9 @@ static gboolean -socket_accept_incoming_migration(QIOChannel *ioc, -> -> -> } -> -> -> -> -> -> trace_migration_socket_incoming_accepted() -> -> -> -> -> -> qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") -> -> -> + qio_channel_set_feature(QIO_CHANNEL(sioc), QIO_CHANNEL_FEATURE_SHUTDOWN) -> -> -> migration_channel_process_incoming(migrate_get_current(), -> -> -> QIO_CHANNEL(sioc)) -> -> -> object_unref(OBJECT(sioc)) -> -> -> -> -> Is this patch ok? -> - -Yes, i think this works, but a better way maybe to call -qio_channel_set_feature() -in qio_channel_socket_accept(), we didn't set the SHUTDOWN feature for the -socket accept fd, -Or fix it by this: - -diff --git a/io/channel-socket.c b/io/channel-socket.c -index f546c68..ce6894c 100644 ---- a/io/channel-socket.c -+++ b/io/channel-socket.c -@@ -330,9 +330,8 @@ qio_channel_socket_accept(QIOChannelSocket *ioc, - Error **errp) - { - QIOChannelSocket *cioc -- -- cioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET)) -- cioc->fd = -1 -+ -+ cioc = qio_channel_socket_new() - cioc->remoteAddrLen = sizeof(ioc->remoteAddr) - cioc->localAddrLen = sizeof(ioc->localAddr) - - -Thanks, -Hailiang - -> I have test it . The test could not hang any more. -> -> -> -> -> -> -> -> -> -> -> -> -> 原始邮件 -> -> -> -> 发件人: address@hidden -> 收件人: address@hidden address@hidden -> 抄送人: address@hidden address@hidden address@hidden -> 日 期 :2017å¹´03月22日 09:11 -> 主 题 :Re: [Qemu-devel] 答复: Re: 答复: Re: [BUG]COLO failover hang -> -> -> -> -> -> On 2017/3/21 19:56, Dr. David Alan Gilbert wrote: -> > * Hailiang Zhang (address@hidden) wrote: -> >> Hi, -> >> -> >> Thanks for reporting this, and i confirmed it in my test, and it is a bug. -> >> -> >> Though we tried to call qemu_file_shutdown() to shutdown the related fd, in -> >> case COLO thread/incoming thread is stuck in read/write() while do -failover, -> >> but it didn't take effect, because all the fd used by COLO (also migration) -> >> has been wrapped by qio channel, and it will not call the shutdown API if -> >> we didn't qio_channel_set_feature(QIO_CHANNEL(sioc), -QIO_CHANNEL_FEATURE_SHUTDOWN). -> >> -> >> Cc: Dr. David Alan Gilbert address@hidden -> >> -> >> I doubted migration cancel has the same problem, it may be stuck in write() -> >> if we tried to cancel migration. -> >> -> >> void fd_start_outgoing_migration(MigrationState *s, const char *fdname, -Error **errp) -> >> { -> >> qio_channel_set_name(QIO_CHANNEL(ioc), "migration-fd-outgoing") -> >> migration_channel_connect(s, ioc, NULL) -> >> ... ... -> >> We didn't call qio_channel_set_feature(QIO_CHANNEL(sioc), -QIO_CHANNEL_FEATURE_SHUTDOWN) above, -> >> and the -> >> migrate_fd_cancel() -> >> { -> >> ... ... -> >> if (s->state == MIGRATION_STATUS_CANCELLING && f) { -> >> qemu_file_shutdown(f) --> This will not take effect. No ? -> >> } -> >> } -> > -> > (cc'd in Daniel Berrange). -> > I see that we call qio_channel_set_feature(ioc, -QIO_CHANNEL_FEATURE_SHUTDOWN) at the -> > top of qio_channel_socket_new so I think that's safe isn't it? -> > -> -> Hmm, you are right, this problem is only exist for the migration incoming fd, -thanks. -> -> > Dave -> > -> >> Thanks, -> >> Hailiang -> >> -> >> On 2017/3/21 16:10, address@hidden wrote: -> >>> Thank you。 -> >>> -> >>> I have test aready。 -> >>> -> >>> When the Primary Node panic,the Secondary Node qemu hang at the same -place。 -> >>> -> >>> Incorrding -http://wiki.qemu-project.org/Features/COLO -,kill Primary Node -qemu will not produce the problem,but Primary Node panic can。 -> >>> -> >>> I think due to the feature of channel does not support -QIO_CHANNEL_FEATURE_SHUTDOWN. -> >>> -> >>> -> >>> when failover,channel_shutdown could not shut down the channel. -> >>> -> >>> -> >>> so the colo_process_incoming_thread will hang at recvmsg. -> >>> -> >>> -> >>> I test a patch: -> >>> -> >>> -> >>> diff --git a/migration/socket.c b/migration/socket.c -> >>> -> >>> -> >>> index 13966f1..d65a0ea 100644 -> >>> -> >>> -> >>> --- a/migration/socket.c -> >>> -> >>> -> >>> +++ b/migration/socket.c -> >>> -> >>> -> >>> @@ -147,8 +147,9 @@ static gboolean -socket_accept_incoming_migration(QIOChannel *ioc, -> >>> -> >>> -> >>> } -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> trace_migration_socket_incoming_accepted() -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> qio_channel_set_name(QIO_CHANNEL(sioc), -"migration-socket-incoming") -> >>> -> >>> -> >>> + qio_channel_set_feature(QIO_CHANNEL(sioc), -QIO_CHANNEL_FEATURE_SHUTDOWN) -> >>> -> >>> -> >>> migration_channel_process_incoming(migrate_get_current(), -> >>> -> >>> -> >>> QIO_CHANNEL(sioc)) -> >>> -> >>> -> >>> object_unref(OBJECT(sioc)) -> >>> -> >>> -> >>> -> >>> -> >>> My test will not hang any more. -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> 原始邮件 -> >>> -> >>> -> >>> -> >>> 发件人: address@hidden -> >>> 收件人:王广10165992 address@hidden -> >>> 抄送人: address@hidden address@hidden -> >>> 日 期 :2017å¹´03月21日 15:58 -> >>> 主 题 :Re: [Qemu-devel] 答复: Re: [BUG]COLO failover hang -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> Hi,Wang. -> >>> -> >>> You can test this branch: -> >>> -> >>> -https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk -> >>> -> >>> and please follow wiki ensure your own configuration correctly. -> >>> -> >>> -http://wiki.qemu-project.org/Features/COLO -> >>> -> >>> -> >>> Thanks -> >>> -> >>> Zhang Chen -> >>> -> >>> -> >>> On 03/21/2017 03:27 PM, address@hidden wrote: -> >>> > -> >>> > hi. -> >>> > -> >>> > I test the git qemu master have the same problem. -> >>> > -> >>> > (gdb) bt -> >>> > -> >>> > #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, -> >>> > niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 -> >>> > -> >>> > #1 0x00007f658e4aa0c2 in qio_channel_read -> >>> > (address@hidden, address@hidden "", -> >>> > address@hidden, address@hidden) at io/channel.c:114 -> >>> > -> >>> > #2 0x00007f658e3ea990 in channel_get_buffer (opaque=<optimized out>, -> >>> > buf=0x7f65907cb838 "", pos=<optimized out>, size=32768) at -> >>> > migration/qemu-file-channel.c:78 -> >>> > -> >>> > #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at -> >>> > migration/qemu-file.c:295 -> >>> > -> >>> > #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, -> >>> > address@hidden) at migration/qemu-file.c:555 -> >>> > -> >>> > #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at -> >>> > migration/qemu-file.c:568 -> >>> > -> >>> > #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at -> >>> > migration/qemu-file.c:648 -> >>> > -> >>> > #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, -> >>> > address@hidden) at migration/colo.c:244 -> >>> > -> >>> > #8 0x00007f658e3e681e in colo_receive_check_message (f=<optimized -> >>> > out>, address@hidden, -> >>> > address@hidden) -> >>> > -> >>> > at migration/colo.c:264 -> >>> > -> >>> > #9 0x00007f658e3e740e in colo_process_incoming_thread -> >>> > (opaque=0x7f658eb30360 <mis_current.31286>) at migration/colo.c:577 -> >>> > -> >>> > #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 -> >>> > -> >>> > #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 -> >>> > -> >>> > (gdb) p ioc->name -> >>> > -> >>> > $2 = 0x7f658ff7d5c0 "migration-socket-incoming" -> >>> > -> >>> > (gdb) p ioc->features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN -> >>> > -> >>> > $3 = 0 -> >>> > -> >>> > -> >>> > (gdb) bt -> >>> > -> >>> > #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, -> >>> > condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 -> >>> > -> >>> > #1 0x00007fdcc6966350 in g_main_dispatch (context=<optimized out>) at -> >>> > gmain.c:3054 -> >>> > -> >>> > #2 g_main_context_dispatch (context=<optimized out>, -> >>> > address@hidden) at gmain.c:3630 -> >>> > -> >>> > #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 -> >>> > -> >>> > #4 os_host_main_loop_wait (timeout=<optimized out>) at -> >>> > util/main-loop.c:258 -> >>> > -> >>> > #5 main_loop_wait (address@hidden) at -> >>> > util/main-loop.c:506 -> >>> > -> >>> > #6 0x00007fdccb526187 in main_loop () at vl.c:1898 -> >>> > -> >>> > #7 main (argc=<optimized out>, argv=<optimized out>, envp=<optimized -> >>> > out>) at vl.c:4709 -> >>> > -> >>> > (gdb) p ioc->features -> >>> > -> >>> > $1 = 6 -> >>> > -> >>> > (gdb) p ioc->name -> >>> > -> >>> > $2 = 0x7fdcce1b1ab0 "migration-socket-listener" -> >>> > -> >>> > -> >>> > May be socket_accept_incoming_migration should -> >>> > call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? -> >>> > -> >>> > -> >>> > thank you. -> >>> > -> >>> > -> >>> > -> >>> > -> >>> > -> >>> > 原始邮件 -> >>> > address@hidden -> >>> > address@hidden -> >>> > address@hidden@huawei.com> -> >>> > *日 期 :*2017å¹´03月16日 14:46 -> >>> > *主 题 :**Re: [Qemu-devel] COLO failover hang* -> >>> > -> >>> > -> >>> > -> >>> > -> >>> > On 03/15/2017 05:06 PM, wangguang wrote: -> >>> > > am testing QEMU COLO feature described here [QEMU -> >>> > > Wiki]( -http://wiki.qemu-project.org/Features/COLO -). -> >>> > > -> >>> > > When the Primary Node panic,the Secondary Node qemu hang. -> >>> > > hang at recvmsg in qio_channel_socket_readv. -> >>> > > And I run { 'execute': 'nbd-server-stop' } and { "execute": -> >>> > > "x-colo-lost-heartbeat" } in Secondary VM's -> >>> > > monitor,the Secondary Node qemu still hang at recvmsg . -> >>> > > -> >>> > > I found that the colo in qemu is not complete yet. -> >>> > > Do the colo have any plan for development? -> >>> > -> >>> > Yes, We are developing. You can see some of patch we pushing. -> >>> > -> >>> > > Has anyone ever run it successfully? Any help is appreciated! -> >>> > -> >>> > In our internal version can run it successfully, -> >>> > The failover detail you can ask Zhanghailiang for help. -> >>> > Next time if you have some question about COLO, -> >>> > please cc me and zhanghailiang address@hidden -> >>> > -> >>> > -> >>> > Thanks -> >>> > Zhang Chen -> >>> > -> >>> > -> >>> > > -> >>> > > -> >>> > > -> >>> > > centos7.2+qemu2.7.50 -> >>> > > (gdb) bt -> >>> > > #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 -> >>> > > #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=<optimized -out>, -> >>> > > iov=<optimized out>, niov=<optimized out>, fds=0x0, nfds=0x0, -errp=0x0) at -> >>> > > io/channel-socket.c:497 -> >>> > > #2 0x00007f3e03329472 in qio_channel_read (address@hidden, -> >>> > > address@hidden "", address@hidden, -> >>> > > address@hidden) at io/channel.c:97 -> >>> > > #3 0x00007f3e032750e0 in channel_get_buffer (opaque=<optimized out>, -> >>> > > buf=0x7f3e05910f38 "", pos=<optimized out>, size=32768) at -> >>> > > migration/qemu-file-channel.c:78 -> >>> > > #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at -> >>> > > migration/qemu-file.c:257 -> >>> > > #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, -> >>> > > address@hidden) at migration/qemu-file.c:510 -> >>> > > #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at -> >>> > > migration/qemu-file.c:523 -> >>> > > #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at -> >>> > > migration/qemu-file.c:603 -> >>> > > #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, -> >>> > > address@hidden) at migration/colo.c:215 -> >>> > > #9 0x00007f3e0327250d in colo_wait_handle_message -(errp=0x7f3d62bfaa48, -> >>> > > checkpoint_request=<synthetic pointer>, f=<optimized out>) at -> >>> > > migration/colo.c:546 -> >>> > > #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at -> >>> > > migration/colo.c:649 -> >>> > > #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 -> >>> > > #12 0x00007f3dfc9c03ed in clone () from /lib64/libc..so.6 -> >>> > > -> >>> > > -> >>> > > -> >>> > > -> >>> > > -> >>> > > -- -> >>> > > View this message in context: -http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html -> >>> > > Sent from the Developer mailing list archive at Nabble.com. -> >>> > > -> >>> > > -> >>> > > -> >>> > > -> >>> > -> >>> > -- -> >>> > Thanks -> >>> > Zhang Chen -> >>> > -> >>> > -> >>> > -> >>> > -> >>> > -> >>> -> >> -> > -- -> > Dr. David Alan Gilbert / address@hidden / Manchester, UK -> > -> > . -> > -> - -On 2017/3/22 16:09, address@hidden wrote: -hi: - -yes.it is better. - -And should we delete -Yes, you are right. -#ifdef WIN32 - - QIO_CHANNEL(cioc)->event = CreateEvent(NULL, FALSE, FALSE, NULL) - -#endif - - - - -in qio_channel_socket_accept? - -qio_channel_socket_new already have it. - - - - - - - - - - - - -原始邮件 - - - -发件人: address@hidden -收件人:王广10165992 -抄送人: address@hidden address@hidden address@hidden address@hidden -日 期 :2017å¹´03月22日 15:03 -主 题 :Re: [Qemu-devel] 答复: Re: 答复: Re: 答复: Re: [BUG]COLO failover hang - - - - - -Hi, - -On 2017/3/22 9:42, address@hidden wrote: -> diff --git a/migration/socket.c b/migration/socket.c -> -> -> index 13966f1..d65a0ea 100644 -> -> -> --- a/migration/socket.c -> -> -> +++ b/migration/socket.c -> -> -> @@ -147,8 +147,9 @@ static gboolean -socket_accept_incoming_migration(QIOChannel *ioc, -> -> -> } -> -> -> -> -> -> trace_migration_socket_incoming_accepted() -> -> -> -> -> -> qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") -> -> -> + qio_channel_set_feature(QIO_CHANNEL(sioc), QIO_CHANNEL_FEATURE_SHUTDOWN) -> -> -> migration_channel_process_incoming(migrate_get_current(), -> -> -> QIO_CHANNEL(sioc)) -> -> -> object_unref(OBJECT(sioc)) -> -> -> -> -> Is this patch ok? -> - -Yes, i think this works, but a better way maybe to call -qio_channel_set_feature() -in qio_channel_socket_accept(), we didn't set the SHUTDOWN feature for the -socket accept fd, -Or fix it by this: - -diff --git a/io/channel-socket.c b/io/channel-socket.c -index f546c68..ce6894c 100644 ---- a/io/channel-socket.c -+++ b/io/channel-socket.c -@@ -330,9 +330,8 @@ qio_channel_socket_accept(QIOChannelSocket *ioc, - Error **errp) - { - QIOChannelSocket *cioc -- -- cioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET)) -- cioc->fd = -1 -+ -+ cioc = qio_channel_socket_new() - cioc->remoteAddrLen = sizeof(ioc->remoteAddr) - cioc->localAddrLen = sizeof(ioc->localAddr) - - -Thanks, -Hailiang - -> I have test it . The test could not hang any more. -> -> -> -> -> -> -> -> -> -> -> -> -> 原始邮件 -> -> -> -> 发件人: address@hidden -> 收件人: address@hidden address@hidden -> 抄送人: address@hidden address@hidden address@hidden -> 日 期 :2017å¹´03月22日 09:11 -> 主 题 :Re: [Qemu-devel] 答复: Re: 答复: Re: [BUG]COLO failover hang -> -> -> -> -> -> On 2017/3/21 19:56, Dr. David Alan Gilbert wrote: -> > * Hailiang Zhang (address@hidden) wrote: -> >> Hi, -> >> -> >> Thanks for reporting this, and i confirmed it in my test, and it is a bug. -> >> -> >> Though we tried to call qemu_file_shutdown() to shutdown the related fd, in -> >> case COLO thread/incoming thread is stuck in read/write() while do -failover, -> >> but it didn't take effect, because all the fd used by COLO (also migration) -> >> has been wrapped by qio channel, and it will not call the shutdown API if -> >> we didn't qio_channel_set_feature(QIO_CHANNEL(sioc), -QIO_CHANNEL_FEATURE_SHUTDOWN). -> >> -> >> Cc: Dr. David Alan Gilbert address@hidden -> >> -> >> I doubted migration cancel has the same problem, it may be stuck in write() -> >> if we tried to cancel migration. -> >> -> >> void fd_start_outgoing_migration(MigrationState *s, const char *fdname, -Error **errp) -> >> { -> >> qio_channel_set_name(QIO_CHANNEL(ioc), "migration-fd-outgoing") -> >> migration_channel_connect(s, ioc, NULL) -> >> ... ... -> >> We didn't call qio_channel_set_feature(QIO_CHANNEL(sioc), -QIO_CHANNEL_FEATURE_SHUTDOWN) above, -> >> and the -> >> migrate_fd_cancel() -> >> { -> >> ... ... -> >> if (s->state == MIGRATION_STATUS_CANCELLING && f) { -> >> qemu_file_shutdown(f) --> This will not take effect. No ? -> >> } -> >> } -> > -> > (cc'd in Daniel Berrange). -> > I see that we call qio_channel_set_feature(ioc, -QIO_CHANNEL_FEATURE_SHUTDOWN) at the -> > top of qio_channel_socket_new so I think that's safe isn't it? -> > -> -> Hmm, you are right, this problem is only exist for the migration incoming fd, -thanks. -> -> > Dave -> > -> >> Thanks, -> >> Hailiang -> >> -> >> On 2017/3/21 16:10, address@hidden wrote: -> >>> Thank you。 -> >>> -> >>> I have test aready。 -> >>> -> >>> When the Primary Node panic,the Secondary Node qemu hang at the same -place。 -> >>> -> >>> Incorrding -http://wiki.qemu-project.org/Features/COLO -,kill Primary Node -qemu will not produce the problem,but Primary Node panic can。 -> >>> -> >>> I think due to the feature of channel does not support -QIO_CHANNEL_FEATURE_SHUTDOWN. -> >>> -> >>> -> >>> when failover,channel_shutdown could not shut down the channel. -> >>> -> >>> -> >>> so the colo_process_incoming_thread will hang at recvmsg. -> >>> -> >>> -> >>> I test a patch: -> >>> -> >>> -> >>> diff --git a/migration/socket.c b/migration/socket.c -> >>> -> >>> -> >>> index 13966f1..d65a0ea 100644 -> >>> -> >>> -> >>> --- a/migration/socket.c -> >>> -> >>> -> >>> +++ b/migration/socket.c -> >>> -> >>> -> >>> @@ -147,8 +147,9 @@ static gboolean -socket_accept_incoming_migration(QIOChannel *ioc, -> >>> -> >>> -> >>> } -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> trace_migration_socket_incoming_accepted() -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> qio_channel_set_name(QIO_CHANNEL(sioc), -"migration-socket-incoming") -> >>> -> >>> -> >>> + qio_channel_set_feature(QIO_CHANNEL(sioc), -QIO_CHANNEL_FEATURE_SHUTDOWN) -> >>> -> >>> -> >>> migration_channel_process_incoming(migrate_get_current(), -> >>> -> >>> -> >>> QIO_CHANNEL(sioc)) -> >>> -> >>> -> >>> object_unref(OBJECT(sioc)) -> >>> -> >>> -> >>> -> >>> -> >>> My test will not hang any more. -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> 原始邮件 -> >>> -> >>> -> >>> -> >>> 发件人: address@hidden -> >>> 收件人:王广10165992 address@hidden -> >>> 抄送人: address@hidden address@hidden -> >>> 日 期 :2017å¹´03月21日 15:58 -> >>> 主 题 :Re: [Qemu-devel] 答复: Re: [BUG]COLO failover hang -> >>> -> >>> -> >>> -> >>> -> >>> -> >>> Hi,Wang. -> >>> -> >>> You can test this branch: -> >>> -> >>> -https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk -> >>> -> >>> and please follow wiki ensure your own configuration correctly. -> >>> -> >>> -http://wiki.qemu-project.org/Features/COLO -> >>> -> >>> -> >>> Thanks -> >>> -> >>> Zhang Chen -> >>> -> >>> -> >>> On 03/21/2017 03:27 PM, address@hidden wrote: -> >>> > -> >>> > hi. -> >>> > -> >>> > I test the git qemu master have the same problem. -> >>> > -> >>> > (gdb) bt -> >>> > -> >>> > #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, -> >>> > niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 -> >>> > -> >>> > #1 0x00007f658e4aa0c2 in qio_channel_read -> >>> > (address@hidden, address@hidden "", -> >>> > address@hidden, address@hidden) at io/channel.c:114 -> >>> > -> >>> > #2 0x00007f658e3ea990 in channel_get_buffer (opaque=<optimized out>, -> >>> > buf=0x7f65907cb838 "", pos=<optimized out>, size=32768) at -> >>> > migration/qemu-file-channel.c:78 -> >>> > -> >>> > #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at -> >>> > migration/qemu-file.c:295 -> >>> > -> >>> > #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, -> >>> > address@hidden) at migration/qemu-file.c:555 -> >>> > -> >>> > #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at -> >>> > migration/qemu-file.c:568 -> >>> > -> >>> > #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at -> >>> > migration/qemu-file.c:648 -> >>> > -> >>> > #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, -> >>> > address@hidden) at migration/colo.c:244 -> >>> > -> >>> > #8 0x00007f658e3e681e in colo_receive_check_message (f=<optimized -> >>> > out>, address@hidden, -> >>> > address@hidden) -> >>> > -> >>> > at migration/colo.c:264 -> >>> > -> >>> > #9 0x00007f658e3e740e in colo_process_incoming_thread -> >>> > (opaque=0x7f658eb30360 <mis_current.31286>) at migration/colo.c:577 -> >>> > -> >>> > #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 -> >>> > -> >>> > #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 -> >>> > -> >>> > (gdb) p ioc->name -> >>> > -> >>> > $2 = 0x7f658ff7d5c0 "migration-socket-incoming" -> >>> > -> >>> > (gdb) p ioc->features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN -> >>> > -> >>> > $3 = 0 -> >>> > -> >>> > -> >>> > (gdb) bt -> >>> > -> >>> > #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, -> >>> > condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 -> >>> > -> >>> > #1 0x00007fdcc6966350 in g_main_dispatch (context=<optimized out>) at -> >>> > gmain.c:3054 -> >>> > -> >>> > #2 g_main_context_dispatch (context=<optimized out>, -> >>> > address@hidden) at gmain.c:3630 -> >>> > -> >>> > #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 -> >>> > -> >>> > #4 os_host_main_loop_wait (timeout=<optimized out>) at -> >>> > util/main-loop.c:258 -> >>> > -> >>> > #5 main_loop_wait (address@hidden) at -> >>> > util/main-loop.c:506 -> >>> > -> >>> > #6 0x00007fdccb526187 in main_loop () at vl.c:1898 -> >>> > -> >>> > #7 main (argc=<optimized out>, argv=<optimized out>, envp=<optimized -> >>> > out>) at vl.c:4709 -> >>> > -> >>> > (gdb) p ioc->features -> >>> > -> >>> > $1 = 6 -> >>> > -> >>> > (gdb) p ioc->name -> >>> > -> >>> > $2 = 0x7fdcce1b1ab0 "migration-socket-listener" -> >>> > -> >>> > -> >>> > May be socket_accept_incoming_migration should -> >>> > call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? -> >>> > -> >>> > -> >>> > thank you. -> >>> > -> >>> > -> >>> > -> >>> > -> >>> > -> >>> > 原始邮件 -> >>> > address@hidden -> >>> > address@hidden -> >>> > address@hidden@huawei.com> -> >>> > *日 期 :*2017å¹´03月16日 14:46 -> >>> > *主 题 :**Re: [Qemu-devel] COLO failover hang* -> >>> > -> >>> > -> >>> > -> >>> > -> >>> > On 03/15/2017 05:06 PM, wangguang wrote: -> >>> > > am testing QEMU COLO feature described here [QEMU -> >>> > > Wiki]( -http://wiki.qemu-project.org/Features/COLO -). -> >>> > > -> >>> > > When the Primary Node panic,the Secondary Node qemu hang. -> >>> > > hang at recvmsg in qio_channel_socket_readv. -> >>> > > And I run { 'execute': 'nbd-server-stop' } and { "execute": -> >>> > > "x-colo-lost-heartbeat" } in Secondary VM's -> >>> > > monitor,the Secondary Node qemu still hang at recvmsg . -> >>> > > -> >>> > > I found that the colo in qemu is not complete yet. -> >>> > > Do the colo have any plan for development? -> >>> > -> >>> > Yes, We are developing. You can see some of patch we pushing. -> >>> > -> >>> > > Has anyone ever run it successfully? Any help is appreciated! -> >>> > -> >>> > In our internal version can run it successfully, -> >>> > The failover detail you can ask Zhanghailiang for help. -> >>> > Next time if you have some question about COLO, -> >>> > please cc me and zhanghailiang address@hidden -> >>> > -> >>> > -> >>> > Thanks -> >>> > Zhang Chen -> >>> > -> >>> > -> >>> > > -> >>> > > -> >>> > > -> >>> > > centos7.2+qemu2.7.50 -> >>> > > (gdb) bt -> >>> > > #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 -> >>> > > #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=<optimized -out>, -> >>> > > iov=<optimized out>, niov=<optimized out>, fds=0x0, nfds=0x0, -errp=0x0) at -> >>> > > io/channel-socket.c:497 -> >>> > > #2 0x00007f3e03329472 in qio_channel_read (address@hidden, -> >>> > > address@hidden "", address@hidden, -> >>> > > address@hidden) at io/channel.c:97 -> >>> > > #3 0x00007f3e032750e0 in channel_get_buffer (opaque=<optimized out>, -> >>> > > buf=0x7f3e05910f38 "", pos=<optimized out>, size=32768) at -> >>> > > migration/qemu-file-channel.c:78 -> >>> > > #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at -> >>> > > migration/qemu-file.c:257 -> >>> > > #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, -> >>> > > address@hidden) at migration/qemu-file.c:510 -> >>> > > #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at -> >>> > > migration/qemu-file.c:523 -> >>> > > #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at -> >>> > > migration/qemu-file.c:603 -> >>> > > #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, -> >>> > > address@hidden) at migration/colo.c:215 -> >>> > > #9 0x00007f3e0327250d in colo_wait_handle_message -(errp=0x7f3d62bfaa48, -> >>> > > checkpoint_request=<synthetic pointer>, f=<optimized out>) at -> >>> > > migration/colo.c:546 -> >>> > > #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at -> >>> > > migration/colo.c:649 -> >>> > > #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 -> >>> > > #12 0x00007f3dfc9c03ed in clone () from /lib64/libc..so.6 -> >>> > > -> >>> > > -> >>> > > -> >>> > > -> >>> > > -> >>> > > -- -> >>> > > View this message in context: -http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html -> >>> > > Sent from the Developer mailing list archive at Nabble.com. -> >>> > > -> >>> > > -> >>> > > -> >>> > > -> >>> > -> >>> > -- -> >>> > Thanks -> >>> > Zhang Chen -> >>> > -> >>> > -> >>> > -> >>> > -> >>> > -> >>> -> >> -> > -- -> > Dr. David Alan Gilbert / address@hidden / Manchester, UK -> > -> > . -> > -> - diff --git a/classification_output/01/mistranslation/22219210 b/classification_output/01/mistranslation/22219210 new file mode 100644 index 000000000..95c3f61d1 --- /dev/null +++ b/classification_output/01/mistranslation/22219210 @@ -0,0 +1,43 @@ +mistranslation: 0.472 +semantic: 0.387 +other: 0.345 +instruction: 0.261 + +[BUG][CPU hot-plug]CPU hot-plugs cause the qemu process to coredump + +Hello,Recently, when I was developing CPU hot-plugs under the loongarch +architecture, +I found that there was a problem with qemu cpu hot-plugs under x86 +architecture, +which caused the qemu process coredump when repeatedly inserting and +unplugging +the CPU when the TCG was accelerated. + + +The specific operation process is as follows: + +1.Use the following command to start the virtual machine + +qemu-system-x86_64 \ +-machine q35  \ +-cpu Broadwell-IBRS \ +-smp 1,maxcpus=4,sockets=4,cores=1,threads=1 \ +-m 4G \ +-drive file=~/anolis-8.8.qcow2  \ +-serial stdio   \ +-monitor telnet:localhost:4498,server,nowait + + +2.Enter QEMU Monitor via telnet for repeated CPU insertion and unplugging + +telnet 127.0.0.1 4498 +(qemu) device_add +Broadwell-IBRS-x86_64-cpu,socket-id=1,core-id=0,thread-id=0,id=cpu1 +(qemu) device_del cpu1 +(qemu) device_add +Broadwell-IBRS-x86_64-cpu,socket-id=1,core-id=0,thread-id=0,id=cpu1 +3.You will notice that the QEMU process has a coredump + +# malloc(): unsorted double linked list corrupted +Aborted (core dumped) + diff --git a/classification_output/01/mistranslation/23270873 b/classification_output/01/mistranslation/23270873 new file mode 100644 index 000000000..e4d4789c4 --- /dev/null +++ b/classification_output/01/mistranslation/23270873 @@ -0,0 +1,692 @@ +mistranslation: 0.881 +other: 0.839 +instruction: 0.755 +semantic: 0.752 + +[Qemu-devel] [BUG?] aio_get_linux_aio: Assertion `ctx->linux_aio' failed + +Hi, + +I am seeing some strange QEMU assertion failures for qemu on s390x, +which prevents a guest from starting. + +Git bisecting points to the following commit as the source of the error. + +commit ed6e2161715c527330f936d44af4c547f25f687e +Author: Nishanth Aravamudan +Date: Fri Jun 22 12:37:00 2018 -0700 + + linux-aio: properly bubble up errors from initialization + + laio_init() can fail for a couple of reasons, which will lead to a NULL + pointer dereference in laio_attach_aio_context(). + + To solve this, add a aio_setup_linux_aio() function which is called + early in raw_open_common. If this fails, propagate the error up. The + signature of aio_get_linux_aio() was not modified, because it seems + preferable to return the actual errno from the possible failing + initialization calls. + + Additionally, when the AioContext changes, we need to associate a + LinuxAioState with the new AioContext. Use the bdrv_attach_aio_context + callback and call the new aio_setup_linux_aio(), which will allocate a +new AioContext if needed, and return errors on failures. If it +fails for +any reason, fallback to threaded AIO with an error message, as the + device is already in-use by the guest. + + Add an assert that aio_get_linux_aio() cannot return NULL. + + Signed-off-by: Nishanth Aravamudan + Message-id: address@hidden + Signed-off-by: Stefan Hajnoczi +Not sure what is causing this assertion to fail. Here is the qemu +command line of the guest, from qemu log, which throws this error: +LC_ALL=C PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin +QEMU_AUDIO_DRV=none /usr/local/bin/qemu-system-s390x -name +guest=rt_vm1,debug-threads=on -S -object +secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-21-rt_vm1/master-key.aes +-machine s390-ccw-virtio-2.12,accel=kvm,usb=off,dump-guest-core=off -m +1024 -realtime mlock=off -smp 4,sockets=4,cores=1,threads=1 -object +iothread,id=iothread1 -uuid 0cde16cd-091d-41bd-9ac2-5243df5c9a0d +-display none -no-user-config -nodefaults -chardev +socket,id=charmonitor,fd=28,server,nowait -mon +chardev=charmonitor,id=monitor,mode=control -rtc base=utc -no-shutdown +-boot strict=on -drive +file=/dev/mapper/360050763998b0883980000002a000031,format=raw,if=none,id=drive-virtio-disk0,cache=none,aio=native +-device +virtio-blk-ccw,iothread=iothread1,scsi=off,devno=fe.0.0001,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1,write-cache=on +-netdev tap,fd=30,id=hostnet0,vhost=on,vhostfd=31 -device +virtio-net-ccw,netdev=hostnet0,id=net0,mac=02:3a:c8:67:95:84,devno=fe.0.0000 +-netdev tap,fd=32,id=hostnet1,vhost=on,vhostfd=33 -device +virtio-net-ccw,netdev=hostnet1,id=net1,mac=52:54:00:2a:e5:08,devno=fe.0.0002 +-chardev pty,id=charconsole0 -device +sclpconsole,chardev=charconsole0,id=console0 -device +virtio-balloon-ccw,id=balloon0,devno=fe.3.ffba -sandbox +on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny +-msg timestamp=on +2018-07-17 15:48:42.252+0000: Domain id=21 is tainted: high-privileges +2018-07-17T15:48:42.279380Z qemu-system-s390x: -chardev +pty,id=charconsole0: char device redirected to /dev/pts/3 (label +charconsole0) +qemu-system-s390x: util/async.c:339: aio_get_linux_aio: Assertion +`ctx->linux_aio' failed. +2018-07-17 15:48:43.309+0000: shutting down, reason=failed + + +Any help debugging this would be greatly appreciated. + +Thank you +Farhan + +On 17.07.2018 [13:25:53 -0400], Farhan Ali wrote: +> +Hi, +> +> +I am seeing some strange QEMU assertion failures for qemu on s390x, +> +which prevents a guest from starting. +> +> +Git bisecting points to the following commit as the source of the error. +> +> +commit ed6e2161715c527330f936d44af4c547f25f687e +> +Author: Nishanth Aravamudan +> +Date: Fri Jun 22 12:37:00 2018 -0700 +> +> +linux-aio: properly bubble up errors from initialization +> +> +laio_init() can fail for a couple of reasons, which will lead to a NULL +> +pointer dereference in laio_attach_aio_context(). +> +> +To solve this, add a aio_setup_linux_aio() function which is called +> +early in raw_open_common. If this fails, propagate the error up. The +> +signature of aio_get_linux_aio() was not modified, because it seems +> +preferable to return the actual errno from the possible failing +> +initialization calls. +> +> +Additionally, when the AioContext changes, we need to associate a +> +LinuxAioState with the new AioContext. Use the bdrv_attach_aio_context +> +callback and call the new aio_setup_linux_aio(), which will allocate a +> +new AioContext if needed, and return errors on failures. If it fails for +> +any reason, fallback to threaded AIO with an error message, as the +> +device is already in-use by the guest. +> +> +Add an assert that aio_get_linux_aio() cannot return NULL. +> +> +Signed-off-by: Nishanth Aravamudan +> +Message-id: address@hidden +> +Signed-off-by: Stefan Hajnoczi +> +> +> +Not sure what is causing this assertion to fail. Here is the qemu command +> +line of the guest, from qemu log, which throws this error: +> +> +> +LC_ALL=C PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin +> +QEMU_AUDIO_DRV=none /usr/local/bin/qemu-system-s390x -name +> +guest=rt_vm1,debug-threads=on -S -object +> +secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-21-rt_vm1/master-key.aes +> +-machine s390-ccw-virtio-2.12,accel=kvm,usb=off,dump-guest-core=off -m 1024 +> +-realtime mlock=off -smp 4,sockets=4,cores=1,threads=1 -object +> +iothread,id=iothread1 -uuid 0cde16cd-091d-41bd-9ac2-5243df5c9a0d -display +> +none -no-user-config -nodefaults -chardev +> +socket,id=charmonitor,fd=28,server,nowait -mon +> +chardev=charmonitor,id=monitor,mode=control -rtc base=utc -no-shutdown -boot +> +strict=on -drive +> +file=/dev/mapper/360050763998b0883980000002a000031,format=raw,if=none,id=drive-virtio-disk0,cache=none,aio=native +> +-device +> +virtio-blk-ccw,iothread=iothread1,scsi=off,devno=fe.0.0001,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1,write-cache=on +> +-netdev tap,fd=30,id=hostnet0,vhost=on,vhostfd=31 -device +> +virtio-net-ccw,netdev=hostnet0,id=net0,mac=02:3a:c8:67:95:84,devno=fe.0.0000 +> +-netdev tap,fd=32,id=hostnet1,vhost=on,vhostfd=33 -device +> +virtio-net-ccw,netdev=hostnet1,id=net1,mac=52:54:00:2a:e5:08,devno=fe.0.0002 +> +-chardev pty,id=charconsole0 -device +> +sclpconsole,chardev=charconsole0,id=console0 -device +> +virtio-balloon-ccw,id=balloon0,devno=fe.3.ffba -sandbox +> +on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny -msg +> +timestamp=on +> +> +> +> +2018-07-17 15:48:42.252+0000: Domain id=21 is tainted: high-privileges +> +2018-07-17T15:48:42.279380Z qemu-system-s390x: -chardev pty,id=charconsole0: +> +char device redirected to /dev/pts/3 (label charconsole0) +> +qemu-system-s390x: util/async.c:339: aio_get_linux_aio: Assertion +> +`ctx->linux_aio' failed. +> +2018-07-17 15:48:43.309+0000: shutting down, reason=failed +> +> +> +Any help debugging this would be greatly appreciated. +iiuc, this possibly implies AIO was not actually used previously on this +guest (it might have silently been falling back to threaded IO?). I +don't have access to s390x, but would it be possible to run qemu under +gdb and see if aio_setup_linux_aio is being called at all (I think it +might not be, but I'm not sure why), and if so, if it's for the context +in question? + +If it's not being called first, could you see what callpath is calling +aio_get_linux_aio when this assertion trips? + +Thanks! +-Nish + +On 07/17/2018 04:52 PM, Nishanth Aravamudan wrote: +iiuc, this possibly implies AIO was not actually used previously on this +guest (it might have silently been falling back to threaded IO?). I +don't have access to s390x, but would it be possible to run qemu under +gdb and see if aio_setup_linux_aio is being called at all (I think it +might not be, but I'm not sure why), and if so, if it's for the context +in question? + +If it's not being called first, could you see what callpath is calling +aio_get_linux_aio when this assertion trips? + +Thanks! +-Nish +Hi Nishant, +From the coredump of the guest this is the call trace that calls +aio_get_linux_aio: +Stack trace of thread 145158: +#0 0x000003ff94dbe274 raise (libc.so.6) +#1 0x000003ff94da39a8 abort (libc.so.6) +#2 0x000003ff94db62ce __assert_fail_base (libc.so.6) +#3 0x000003ff94db634c __assert_fail (libc.so.6) +#4 0x000002aa20db067a aio_get_linux_aio (qemu-system-s390x) +#5 0x000002aa20d229a8 raw_aio_plug (qemu-system-s390x) +#6 0x000002aa20d309ee bdrv_io_plug (qemu-system-s390x) +#7 0x000002aa20b5a8ea virtio_blk_handle_vq (qemu-system-s390x) +#8 0x000002aa20db2f6e aio_dispatch_handlers (qemu-system-s390x) +#9 0x000002aa20db3c34 aio_poll (qemu-system-s390x) +#10 0x000002aa20be32a2 iothread_run (qemu-system-s390x) +#11 0x000003ff94f879a8 start_thread (libpthread.so.0) +#12 0x000003ff94e797ee thread_start (libc.so.6) + + +Thanks for taking a look and responding. + +Thanks +Farhan + +On 07/18/2018 09:42 AM, Farhan Ali wrote: +On 07/17/2018 04:52 PM, Nishanth Aravamudan wrote: +iiuc, this possibly implies AIO was not actually used previously on this +guest (it might have silently been falling back to threaded IO?). I +don't have access to s390x, but would it be possible to run qemu under +gdb and see if aio_setup_linux_aio is being called at all (I think it +might not be, but I'm not sure why), and if so, if it's for the context +in question? + +If it's not being called first, could you see what callpath is calling +aio_get_linux_aio when this assertion trips? + +Thanks! +-Nish +Hi Nishant, +From the coredump of the guest this is the call trace that calls +aio_get_linux_aio: +Stack trace of thread 145158: +#0  0x000003ff94dbe274 raise (libc.so.6) +#1  0x000003ff94da39a8 abort (libc.so.6) +#2  0x000003ff94db62ce __assert_fail_base (libc.so.6) +#3  0x000003ff94db634c __assert_fail (libc.so.6) +#4  0x000002aa20db067a aio_get_linux_aio (qemu-system-s390x) +#5  0x000002aa20d229a8 raw_aio_plug (qemu-system-s390x) +#6  0x000002aa20d309ee bdrv_io_plug (qemu-system-s390x) +#7  0x000002aa20b5a8ea virtio_blk_handle_vq (qemu-system-s390x) +#8  0x000002aa20db2f6e aio_dispatch_handlers (qemu-system-s390x) +#9  0x000002aa20db3c34 aio_poll (qemu-system-s390x) +#10 0x000002aa20be32a2 iothread_run (qemu-system-s390x) +#11 0x000003ff94f879a8 start_thread (libpthread.so.0) +#12 0x000003ff94e797ee thread_start (libc.so.6) + + +Thanks for taking a look and responding. + +Thanks +Farhan +Trying to debug a little further, the block device in this case is a +"host device". And looking at your commit carefully you use the +bdrv_attach_aio_context callback to setup a Linux AioContext. +For some reason the "host device" struct (BlockDriver bdrv_host_device +in block/file-posix.c) does not have a bdrv_attach_aio_context defined. +So a simple change of adding the callback to the struct solves the issue +and the guest starts fine. +diff --git a/block/file-posix.c b/block/file-posix.c +index 28824aa..b8d59fb 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -3135,6 +3135,7 @@ static BlockDriver bdrv_host_device = { + .bdrv_refresh_limits = raw_refresh_limits, + .bdrv_io_plug = raw_aio_plug, + .bdrv_io_unplug = raw_aio_unplug, ++ .bdrv_attach_aio_context = raw_aio_attach_aio_context, + + .bdrv_co_truncate = raw_co_truncate, + .bdrv_getlength = raw_getlength, +I am not too familiar with block device code in QEMU, so not sure if +this is the right fix or if there are some underlying problems. +Thanks +Farhan + +On 18.07.2018 [11:10:27 -0400], Farhan Ali wrote: +> +> +> +On 07/18/2018 09:42 AM, Farhan Ali wrote: +> +> +> +> +> +> On 07/17/2018 04:52 PM, Nishanth Aravamudan wrote: +> +> > iiuc, this possibly implies AIO was not actually used previously on this +> +> > guest (it might have silently been falling back to threaded IO?). I +> +> > don't have access to s390x, but would it be possible to run qemu under +> +> > gdb and see if aio_setup_linux_aio is being called at all (I think it +> +> > might not be, but I'm not sure why), and if so, if it's for the context +> +> > in question? +> +> > +> +> > If it's not being called first, could you see what callpath is calling +> +> > aio_get_linux_aio when this assertion trips? +> +> > +> +> > Thanks! +> +> > -Nish +> +> +> +> +> +> Hi Nishant, +> +> +> +> From the coredump of the guest this is the call trace that calls +> +> aio_get_linux_aio: +> +> +> +> +> +> Stack trace of thread 145158: +> +> #0  0x000003ff94dbe274 raise (libc.so.6) +> +> #1  0x000003ff94da39a8 abort (libc.so.6) +> +> #2  0x000003ff94db62ce __assert_fail_base (libc.so.6) +> +> #3  0x000003ff94db634c __assert_fail (libc.so.6) +> +> #4  0x000002aa20db067a aio_get_linux_aio (qemu-system-s390x) +> +> #5  0x000002aa20d229a8 raw_aio_plug (qemu-system-s390x) +> +> #6  0x000002aa20d309ee bdrv_io_plug (qemu-system-s390x) +> +> #7  0x000002aa20b5a8ea virtio_blk_handle_vq (qemu-system-s390x) +> +> #8  0x000002aa20db2f6e aio_dispatch_handlers (qemu-system-s390x) +> +> #9  0x000002aa20db3c34 aio_poll (qemu-system-s390x) +> +> #10 0x000002aa20be32a2 iothread_run (qemu-system-s390x) +> +> #11 0x000003ff94f879a8 start_thread (libpthread.so.0) +> +> #12 0x000003ff94e797ee thread_start (libc.so.6) +> +> +> +> +> +> Thanks for taking a look and responding. +> +> +> +> Thanks +> +> Farhan +> +> +> +> +> +> +> +> +Trying to debug a little further, the block device in this case is a "host +> +device". And looking at your commit carefully you use the +> +bdrv_attach_aio_context callback to setup a Linux AioContext. +> +> +For some reason the "host device" struct (BlockDriver bdrv_host_device in +> +block/file-posix.c) does not have a bdrv_attach_aio_context defined. +> +So a simple change of adding the callback to the struct solves the issue and +> +the guest starts fine. +> +> +> +diff --git a/block/file-posix.c b/block/file-posix.c +> +index 28824aa..b8d59fb 100644 +> +--- a/block/file-posix.c +> ++++ b/block/file-posix.c +> +@@ -3135,6 +3135,7 @@ static BlockDriver bdrv_host_device = { +> +.bdrv_refresh_limits = raw_refresh_limits, +> +.bdrv_io_plug = raw_aio_plug, +> +.bdrv_io_unplug = raw_aio_unplug, +> ++ .bdrv_attach_aio_context = raw_aio_attach_aio_context, +> +> +.bdrv_co_truncate = raw_co_truncate, +> +.bdrv_getlength = raw_getlength, +> +> +> +> +I am not too familiar with block device code in QEMU, so not sure if +> +this is the right fix or if there are some underlying problems. +Oh this is quite embarassing! I only added the bdrv_attach_aio_context +callback for the file-backed device. Your fix is definitely corect for +host device. Let me make sure there weren't any others missed and I will +send out a properly formatted patch. Thank you for the quick testing and +turnaround! + +-Nish + +On 07/18/2018 08:52 PM, Nishanth Aravamudan wrote: +> +On 18.07.2018 [11:10:27 -0400], Farhan Ali wrote: +> +> +> +> +> +> On 07/18/2018 09:42 AM, Farhan Ali wrote: +> +>> +> +>> +> +>> On 07/17/2018 04:52 PM, Nishanth Aravamudan wrote: +> +>>> iiuc, this possibly implies AIO was not actually used previously on this +> +>>> guest (it might have silently been falling back to threaded IO?). I +> +>>> don't have access to s390x, but would it be possible to run qemu under +> +>>> gdb and see if aio_setup_linux_aio is being called at all (I think it +> +>>> might not be, but I'm not sure why), and if so, if it's for the context +> +>>> in question? +> +>>> +> +>>> If it's not being called first, could you see what callpath is calling +> +>>> aio_get_linux_aio when this assertion trips? +> +>>> +> +>>> Thanks! +> +>>> -Nish +> +>> +> +>> +> +>> Hi Nishant, +> +>> +> +>> From the coredump of the guest this is the call trace that calls +> +>> aio_get_linux_aio: +> +>> +> +>> +> +>> Stack trace of thread 145158: +> +>> #0  0x000003ff94dbe274 raise (libc.so.6) +> +>> #1  0x000003ff94da39a8 abort (libc.so.6) +> +>> #2  0x000003ff94db62ce __assert_fail_base (libc.so.6) +> +>> #3  0x000003ff94db634c __assert_fail (libc.so.6) +> +>> #4  0x000002aa20db067a aio_get_linux_aio (qemu-system-s390x) +> +>> #5  0x000002aa20d229a8 raw_aio_plug (qemu-system-s390x) +> +>> #6  0x000002aa20d309ee bdrv_io_plug (qemu-system-s390x) +> +>> #7  0x000002aa20b5a8ea virtio_blk_handle_vq (qemu-system-s390x) +> +>> #8  0x000002aa20db2f6e aio_dispatch_handlers (qemu-system-s390x) +> +>> #9  0x000002aa20db3c34 aio_poll (qemu-system-s390x) +> +>> #10 0x000002aa20be32a2 iothread_run (qemu-system-s390x) +> +>> #11 0x000003ff94f879a8 start_thread (libpthread.so.0) +> +>> #12 0x000003ff94e797ee thread_start (libc.so.6) +> +>> +> +>> +> +>> Thanks for taking a look and responding. +> +>> +> +>> Thanks +> +>> Farhan +> +>> +> +>> +> +>> +> +> +> +> Trying to debug a little further, the block device in this case is a "host +> +> device". And looking at your commit carefully you use the +> +> bdrv_attach_aio_context callback to setup a Linux AioContext. +> +> +> +> For some reason the "host device" struct (BlockDriver bdrv_host_device in +> +> block/file-posix.c) does not have a bdrv_attach_aio_context defined. +> +> So a simple change of adding the callback to the struct solves the issue and +> +> the guest starts fine. +> +> +> +> +> +> diff --git a/block/file-posix.c b/block/file-posix.c +> +> index 28824aa..b8d59fb 100644 +> +> --- a/block/file-posix.c +> +> +++ b/block/file-posix.c +> +> @@ -3135,6 +3135,7 @@ static BlockDriver bdrv_host_device = { +> +> .bdrv_refresh_limits = raw_refresh_limits, +> +> .bdrv_io_plug = raw_aio_plug, +> +> .bdrv_io_unplug = raw_aio_unplug, +> +> + .bdrv_attach_aio_context = raw_aio_attach_aio_context, +> +> +> +> .bdrv_co_truncate = raw_co_truncate, +> +> .bdrv_getlength = raw_getlength, +> +> +> +> +> +> +> +> I am not too familiar with block device code in QEMU, so not sure if +> +> this is the right fix or if there are some underlying problems. +> +> +Oh this is quite embarassing! I only added the bdrv_attach_aio_context +> +callback for the file-backed device. Your fix is definitely corect for +> +host device. Let me make sure there weren't any others missed and I will +> +send out a properly formatted patch. Thank you for the quick testing and +> +turnaround! +Farhan, can you respin your patch with proper sign-off and patch description? +Adding qemu-block. + +Hi Christian, + +On 19.07.2018 [08:55:20 +0200], Christian Borntraeger wrote: +> +> +> +On 07/18/2018 08:52 PM, Nishanth Aravamudan wrote: +> +> On 18.07.2018 [11:10:27 -0400], Farhan Ali wrote: +> +>> +> +>> +> +>> On 07/18/2018 09:42 AM, Farhan Ali wrote: + + +> +>> I am not too familiar with block device code in QEMU, so not sure if +> +>> this is the right fix or if there are some underlying problems. +> +> +> +> Oh this is quite embarassing! I only added the bdrv_attach_aio_context +> +> callback for the file-backed device. Your fix is definitely corect for +> +> host device. Let me make sure there weren't any others missed and I will +> +> send out a properly formatted patch. Thank you for the quick testing and +> +> turnaround! +> +> +Farhan, can you respin your patch with proper sign-off and patch description? +> +Adding qemu-block. +I sent it yesterday, sorry I didn't cc everyone from this e-mail: +http://lists.nongnu.org/archive/html/qemu-block/2018-07/msg00516.html +Thanks, +Nish + diff --git a/classification_output/01/mistranslation/24930826 b/classification_output/01/mistranslation/24930826 new file mode 100644 index 000000000..5f79c452f --- /dev/null +++ b/classification_output/01/mistranslation/24930826 @@ -0,0 +1,33 @@ +mistranslation: 0.637 +instruction: 0.555 +other: 0.535 +semantic: 0.487 + +[Qemu-devel] [BUG] vhost-user: hot-unplug vhost-user nic for windows guest OS will fail with 100% reproduce rate + +Hi, guys + +I met a problem when hot-unplug vhost-user nic for Windows 2008 rc2 sp1 64 +(Guest OS) + +The xml of nic is as followed: + + + + + + +
+ + +Firstly, I use virsh attach-device win2008 vif.xml to hot-plug a nic for Guest +OS. This operation returns success. +After guest OS discover nic successfully, I use virsh detach-device win2008 +vif.xml to hot-unplug it. This operation will fail with 100% reproduce rate. + +However, if I hot-plug and hot-unplug virtio-net nic , it will not fail. + +I have analysis the process of qmp_device_del , I found that qemu have inject +interrupt to acpi to let it notice guest OS to remove nic. +I guess there is something wrong in Windows when handle the interrupt. + diff --git a/classification_output/01/mistranslation/25842545 b/classification_output/01/mistranslation/25842545 new file mode 100644 index 000000000..1ebfe2889 --- /dev/null +++ b/classification_output/01/mistranslation/25842545 @@ -0,0 +1,202 @@ +mistranslation: 0.928 +other: 0.912 +instruction: 0.835 +semantic: 0.829 + +[Qemu-devel] [Bug?] Guest pause because VMPTRLD failed in KVM + +Hello, + + We encountered a problem that a guest paused because the KMOD report VMPTRLD +failed. + +The related information is as follows: + +1) Qemu command: + /usr/bin/qemu-kvm -name omu1 -S -machine pc-i440fx-2.3,accel=kvm,usb=off -cpu +host -m 15625 -realtime mlock=off -smp 8,sockets=1,cores=8,threads=1 -uuid +a2aacfff-6583-48b4-b6a4-e6830e519931 -no-user-config -nodefaults -chardev +socket,id=charmonitor,path=/var/lib/libvirt/qemu/omu1.monitor,server,nowait +-mon chardev=charmonitor,id=monitor,mode=control -rtc base=utc -no-shutdown +-boot strict=on -device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 -device +virtio-serial-pci,id=virtio-serial0,bus=pci.0,addr=0x5 -drive +file=/home/env/guest1.qcow2,if=none,id=drive-virtio-disk0,format=qcow2,cache=none,aio=native + -device +virtio-blk-pci,scsi=off,bus=pci.0,addr=0x6,drive=drive-virtio-disk0,id=virtio-disk0 + -drive +file=/home/env/guest_300G.img,if=none,id=drive-virtio-disk1,format=raw,cache=none,aio=native + -device +virtio-blk-pci,scsi=off,bus=pci.0,addr=0x7,drive=drive-virtio-disk1,id=virtio-disk1 + -netdev tap,fd=25,id=hostnet0,vhost=on,vhostfd=26 -device +virtio-net-pci,netdev=hostnet0,id=net0,mac=00:00:80:05:00:00,bus=pci.0,addr=0x3 +-netdev tap,fd=27,id=hostnet1,vhost=on,vhostfd=28 -device +virtio-net-pci,netdev=hostnet1,id=net1,mac=00:00:80:05:00:01,bus=pci.0,addr=0x4 +-chardev pty,id=charserial0 -device isa-serial,chardev=charserial0,id=serial0 +-device usb-tablet,id=input0 -vnc 0.0.0.0:0 -device +cirrus-vga,id=video0,vgamem_mb=16,bus=pci.0,addr=0x2 -device +virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x8 -msg timestamp=on + + 2) Qemu log: + KVM: entry failed, hardware error 0x4 + RAX=00000000ffffffed RBX=ffff8803fa2d7fd8 RCX=0100000000000000 +RDX=0000000000000000 + RSI=0000000000000000 RDI=0000000000000046 RBP=ffff8803fa2d7e90 +RSP=ffff8803fa2efe90 + R8 =0000000000000000 R9 =0000000000000000 R10=0000000000000000 +R11=000000000000b69a + R12=0000000000000001 R13=ffffffff81a25b40 R14=0000000000000000 +R15=ffff8803fa2d7fd8 + RIP=ffffffff81053e16 RFL=00000286 [--S--P-] CPL=0 II=0 A20=1 SMM=0 HLT=0 + ES =0000 0000000000000000 ffffffff 00c00000 + CS =0010 0000000000000000 ffffffff 00a09b00 DPL=0 CS64 [-RA] + SS =0018 0000000000000000 ffffffff 00c09300 DPL=0 DS [-WA] + DS =0000 0000000000000000 ffffffff 00c00000 + FS =0000 0000000000000000 ffffffff 00c00000 + GS =0000 ffff88040f540000 ffffffff 00c00000 + LDT=0000 0000000000000000 ffffffff 00c00000 + TR =0040 ffff88040f550a40 00002087 00008b00 DPL=0 TSS64-busy + GDT= ffff88040f549000 0000007f + IDT= ffffffffff529000 00000fff + CR0=80050033 CR2=00007f81ca0c5000 CR3=00000003f5081000 CR4=000407e0 + DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000 +DR3=0000000000000000 + DR6=00000000ffff0ff0 DR7=0000000000000400 + EFER=0000000000000d01 + Code=?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? +?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? + + 3) Demsg + [347315.028339] kvm: vmptrld ffff8817ec5f0000/17ec5f0000 failed + klogd 1.4.1, ---------- state change ---------- + [347315.039506] kvm: vmptrld ffff8817ec5f0000/17ec5f0000 failed + [347315.051728] kvm: vmptrld ffff8817ec5f0000/17ec5f0000 failed + [347315.057472] vmwrite error: reg 6c0a value ffff88307e66e480 (err +2120672384) + [347315.064567] Pid: 69523, comm: qemu-kvm Tainted: GF X +3.0.93-0.8-default #1 + [347315.064569] Call Trace: + [347315.064587] [] dump_trace+0x75/0x300 + [347315.064595] [] dump_stack+0x69/0x6f + [347315.064617] [] vmx_vcpu_load+0x11e/0x1d0 [kvm_intel] + [347315.064647] [] kvm_arch_vcpu_load+0x44/0x1d0 [kvm] + [347315.064669] [] finish_task_switch+0x81/0xe0 + [347315.064676] [] thread_return+0x3b/0x2a7 + [347315.064687] [] kvm_vcpu_block+0x65/0xa0 [kvm] + [347315.064703] [] __vcpu_run+0xd1/0x260 [kvm] + [347315.064732] [] kvm_arch_vcpu_ioctl_run+0x68/0x1a0 +[kvm] + [347315.064759] [] kvm_vcpu_ioctl+0x38e/0x580 [kvm] + [347315.064771] [] do_vfs_ioctl+0x8b/0x3b0 + [347315.064776] [] sys_ioctl+0xa1/0xb0 + [347315.064783] [] system_call_fastpath+0x16/0x1b + [347315.064797] [<00007fee51969ce7>] 0x7fee51969ce6 + [347315.064799] vmwrite error: reg 6c0c value ffff88307e664000 (err +2120630272) + [347315.064802] Pid: 69523, comm: qemu-kvm Tainted: GF X +3.0.93-0.8-default #1 + [347315.064803] Call Trace: + [347315.064807] [] dump_trace+0x75/0x300 + [347315.064811] [] dump_stack+0x69/0x6f + [347315.064817] [] vmx_vcpu_load+0x12c/0x1d0 [kvm_intel] + [347315.064832] [] kvm_arch_vcpu_load+0x44/0x1d0 [kvm] + [347315.064851] [] finish_task_switch+0x81/0xe0 + [347315.064855] [] thread_return+0x3b/0x2a7 + [347315.064865] [] kvm_vcpu_block+0x65/0xa0 [kvm] + [347315.064880] [] __vcpu_run+0xd1/0x260 [kvm] + [347315.064907] [] kvm_arch_vcpu_ioctl_run+0x68/0x1a0 +[kvm] + [347315.064933] [] kvm_vcpu_ioctl+0x38e/0x580 [kvm] + [347315.064943] [] do_vfs_ioctl+0x8b/0x3b0 + [347315.064947] [] sys_ioctl+0xa1/0xb0 + [347315.064951] [] system_call_fastpath+0x16/0x1b + [347315.064957] [<00007fee51969ce7>] 0x7fee51969ce6 + [347315.064959] vmwrite error: reg 6c10 value 0 (err 0) + + 4) The isssue can't be reporduced. I search the Intel VMX sepc about reaseons +of vmptrld failure: + The instruction fails if its operand is not properly aligned, sets +unsupported physical-address bits, or is equal to the VMXON + pointer. In addition, the instruction fails if the 32 bits in memory +referenced by the operand do not match the VMCS + revision identifier supported by this processor. + + But I can't find any cues from the KVM source code. It seems each + error conditions is impossible in theory. :( + +Any suggestions will be appreciated! Paolo? + +-- +Regards, +-Gonglei + +On 10/11/2016 15:10, gong lei wrote: +> +4) The isssue can't be reporduced. I search the Intel VMX sepc about +> +reaseons +> +of vmptrld failure: +> +The instruction fails if its operand is not properly aligned, sets +> +unsupported physical-address bits, or is equal to the VMXON +> +pointer. In addition, the instruction fails if the 32 bits in memory +> +referenced by the operand do not match the VMCS +> +revision identifier supported by this processor. +> +> +But I can't find any cues from the KVM source code. It seems each +> +error conditions is impossible in theory. :( +Yes, it should not happen. :( + +If it's not reproducible, it's really hard to say what it was, except a +random memory corruption elsewhere or even a bit flip (!). + +Paolo + +On 2016/11/17 20:39, Paolo Bonzini wrote: +> +> +On 10/11/2016 15:10, gong lei wrote: +> +> 4) The isssue can't be reporduced. I search the Intel VMX sepc about +> +> reaseons +> +> of vmptrld failure: +> +> The instruction fails if its operand is not properly aligned, sets +> +> unsupported physical-address bits, or is equal to the VMXON +> +> pointer. In addition, the instruction fails if the 32 bits in memory +> +> referenced by the operand do not match the VMCS +> +> revision identifier supported by this processor. +> +> +> +> But I can't find any cues from the KVM source code. It seems each +> +> error conditions is impossible in theory. :( +> +Yes, it should not happen. :( +> +> +If it's not reproducible, it's really hard to say what it was, except a +> +random memory corruption elsewhere or even a bit flip (!). +> +> +Paolo +Thanks for your reply, Paolo :) + +-- +Regards, +-Gonglei + diff --git a/classification_output/01/mistranslation/26430026 b/classification_output/01/mistranslation/26430026 new file mode 100644 index 000000000..ead1f32fd --- /dev/null +++ b/classification_output/01/mistranslation/26430026 @@ -0,0 +1,165 @@ +mistranslation: 0.915 +semantic: 0.904 +instruction: 0.888 +other: 0.813 + +[BUG] cxl,i386: e820 mappings may not be correct for cxl + +Context included below from prior discussion + - `cxl create-region` would fail on inability to allocate memory + - traced this down to the memory region being marked RESERVED + - E820 map marks the CXL fixed memory window as RESERVED + + +Re: x86 errors, I found that region worked with this patch. (I also +added the SRAT patches the Davidlohr posted, but I do not think they are +relevant). + +I don't think this is correct, and setting this to E820_RAM causes the +system to fail to boot at all, but with this change `cxl create-region` +succeeds, which suggests our e820 mappings in the i386 machine are +incorrect. + +Anyone who can help or have an idea as to what e820 should actually be +doing with this region, or if this is correct and something else is +failing, please help! + + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 566accf7e6..a5e688a742 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -1077,7 +1077,7 @@ void pc_memory_init(PCMachineState *pcms, + memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, fw, + "cxl-fixed-memory-region", fw->size); + memory_region_add_subregion(system_memory, fw->base, &fw->mr); +- e820_add_entry(fw->base, fw->size, E820_RESERVED); ++ e820_add_entry(fw->base, fw->size, E820_NVS); + cxl_fmw_base += fw->size; + cxl_resv_end = cxl_fmw_base; + } + + +On Mon, Oct 10, 2022 at 05:32:42PM +0100, Jonathan Cameron wrote: +> +> +> > but i'm not sure of what to do with this info. We have some proof +> +> > that real hardware works with this no problem, and the only difference +> +> > is that the EFI/bios/firmware is setting the memory regions as `usable` +> +> > or `soft reserved`, which would imply the EDK2 is the blocker here +> +> > regardless of the OS driver status. +> +> > +> +> > But I'd seen elsewhere you had gotten some of this working, and I'm +> +> > failing to get anything working at the moment. If you have any input i +> +> > would greatly appreciate the help. +> +> > +> +> > QEMU config: +> +> > +> +> > /opt/qemu-cxl2/bin/qemu-system-x86_64 \ +> +> > -drive +> +> > file=/var/lib/libvirt/images/cxl.qcow2,format=qcow2,index=0,media=d\ +> +> > -m 2G,slots=4,maxmem=4G \ +> +> > -smp 4 \ +> +> > -machine type=q35,accel=kvm,cxl=on \ +> +> > -enable-kvm \ +> +> > -nographic \ +> +> > -device pxb-cxl,id=cxl.0,bus=pcie.0,bus_nr=52 \ +> +> > -device cxl-rp,id=rp0,bus=cxl.0,chassis=0,slot=0 \ +> +> > -object memory-backend-file,id=cxl-mem0,mem-path=/tmp/cxl-mem0,size=256M \ +> +> > -object memory-backend-file,id=lsa0,mem-path=/tmp/cxl-lsa0,size=256M \ +> +> > -device cxl-type3,bus=rp0,pmem=true,memdev=cxl-mem0,lsa=lsa0,id=cxl-pmem0 +> +> > \ +> +> > -M cxl-fmw.0.targets.0=cxl.0,cxl-fmw.0.size=256M +> +> > +> +> > I'd seen on the lists that you had seen issues with single-rp setups, +> +> > but no combination of configuration I've tried (including all the ones +> +> > in the docs and tests) lead to a successful region creation with +> +> > `cxl create-region` +> +> +> +> Hmm. Let me have a play. I've not run x86 tests for a while so +> +> perhaps something is missing there. +> +> +> +> I'm carrying a patch to override check_last_peer() in +> +> cxl_port_setup_targets() as that is wrong for some combinations, +> +> but that doesn't look like it's related to what you are seeing. +> +> +I'm not sure if it's relevant, but turned out I'd forgotten I'm carrying 3 +> +patches that aren't upstream (and one is a horrible hack). +> +> +Hack: +https://lore.kernel.org/linux-cxl/20220819094655.000005ed@huawei.com/ +> +Shouldn't affect a simple case like this... +> +> +https://lore.kernel.org/linux-cxl/20220819093133.00006c22@huawei.com/T/#t +> +(Dan's version) +> +> +https://lore.kernel.org/linux-cxl/20220815154044.24733-1-Jonathan.Cameron@huawei.com/T/#t +> +> +For writes to work you will currently need two rps (nothing on the second is +> +fine) +> +as we still haven't resolved if the kernel should support an HDM decoder on +> +a host bridge with one port. I think it should (Spec allows it), others +> +unconvinced. +> +> +Note I haven't shifted over to x86 yet so may still be something different +> +from +> +arm64. +> +> +Jonathan +> +> + diff --git a/classification_output/01/mistranslation/36568044 b/classification_output/01/mistranslation/36568044 new file mode 100644 index 000000000..719c03c74 --- /dev/null +++ b/classification_output/01/mistranslation/36568044 @@ -0,0 +1,4581 @@ +mistranslation: 0.962 +instruction: 0.930 +other: 0.930 +semantic: 0.923 + +[BUG, RFC] cpr-transfer: qxl guest driver crashes after migration + +Hi all, + +We've been experimenting with cpr-transfer migration mode recently and +have discovered the following issue with the guest QXL driver: + +Run migration source: +> +EMULATOR=/path/to/emulator +> +ROOTFS=/path/to/image +> +QMPSOCK=/var/run/alma8qmp-src.sock +> +> +$EMULATOR -enable-kvm \ +> +-machine q35 \ +> +-cpu host -smp 2 -m 2G \ +> +-object +> +memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ram0,share=on\ +> +-machine memory-backend=ram0 \ +> +-machine aux-ram-share=on \ +> +-drive file=$ROOTFS,media=disk,if=virtio \ +> +-qmp unix:$QMPSOCK,server=on,wait=off \ +> +-nographic \ +> +-device qxl-vga +Run migration target: +> +EMULATOR=/path/to/emulator +> +ROOTFS=/path/to/image +> +QMPSOCK=/var/run/alma8qmp-dst.sock +> +> +> +> +$EMULATOR -enable-kvm \ +> +-machine q35 \ +> +-cpu host -smp 2 -m 2G \ +> +-object +> +memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ram0,share=on\ +> +-machine memory-backend=ram0 \ +> +-machine aux-ram-share=on \ +> +-drive file=$ROOTFS,media=disk,if=virtio \ +> +-qmp unix:$QMPSOCK,server=on,wait=off \ +> +-nographic \ +> +-device qxl-vga \ +> +-incoming tcp:0:44444 \ +> +-incoming '{"channel-type": "cpr", "addr": { "transport": "socket", +> +"type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' +Launch the migration: +> +QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell +> +QMPSOCK=/var/run/alma8qmp-src.sock +> +> +$QMPSHELL -p $QMPSOCK < +migrate-set-parameters mode=cpr-transfer +> +migrate +> +channels=[{"channel-type":"main","addr":{"transport":"socket","type":"inet","host":"0","port":"44444"}},{"channel-type":"cpr","addr":{"transport":"socket","type":"unix","path":"/var/run/alma8cpr-dst.sock"}}] +> +EOF +Then, after a while, QXL guest driver on target crashes spewing the +following messages: +> +[ 73.962002] [TTM] Buffer eviction failed +> +[ 73.962072] qxl 0000:00:02.0: object_init failed for (3149824, 0x00000001) +> +[ 73.962081] [drm:qxl_alloc_bo_reserved [qxl]] *ERROR* failed to allocate +> +VRAM BO +That seems to be a known kernel QXL driver bug: +https://lore.kernel.org/all/20220907094423.93581-1-min_halo@163.com/T/ +https://lore.kernel.org/lkml/ZTgydqRlK6WX_b29@eldamar.lan/ +(the latter discussion contains that reproduce script which speeds up +the crash in the guest): +> +#!/bin/bash +> +> +chvt 3 +> +> +for j in $(seq 80); do +> +echo "$(date) starting round $j" +> +if [ "$(journalctl --boot | grep "failed to allocate VRAM BO")" != "" +> +]; then +> +echo "bug was reproduced after $j tries" +> +exit 1 +> +fi +> +for i in $(seq 100); do +> +dmesg > /dev/tty3 +> +done +> +done +> +> +echo "bug could not be reproduced" +> +exit 0 +The bug itself seems to remain unfixed, as I was able to reproduce that +with Fedora 41 guest, as well as AlmaLinux 8 guest. However our +cpr-transfer code also seems to be buggy as it triggers the crash - +without the cpr-transfer migration the above reproduce doesn't lead to +crash on the source VM. + +I suspect that, as cpr-transfer doesn't migrate the guest memory, but +rather passes it through the memory backend object, our code might +somehow corrupt the VRAM. However, I wasn't able to trace the +corruption so far. + +Could somebody help the investigation and take a look into this? Any +suggestions would be appreciated. Thanks! + +Andrey + +On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: +Hi all, + +We've been experimenting with cpr-transfer migration mode recently and +have discovered the following issue with the guest QXL driver: + +Run migration source: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-src.sock + +$EMULATOR -enable-kvm \ + -machine q35 \ + -cpu host -smp 2 -m 2G \ + -object +memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ram0,share=on\ + -machine memory-backend=ram0 \ + -machine aux-ram-share=on \ + -drive file=$ROOTFS,media=disk,if=virtio \ + -qmp unix:$QMPSOCK,server=on,wait=off \ + -nographic \ + -device qxl-vga +Run migration target: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-dst.sock +$EMULATOR -enable-kvm \ +-machine q35 \ + -cpu host -smp 2 -m 2G \ + -object +memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ram0,share=on\ + -machine memory-backend=ram0 \ + -machine aux-ram-share=on \ + -drive file=$ROOTFS,media=disk,if=virtio \ + -qmp unix:$QMPSOCK,server=on,wait=off \ + -nographic \ + -device qxl-vga \ + -incoming tcp:0:44444 \ + -incoming '{"channel-type": "cpr", "addr": { "transport": "socket", "type": "unix", +"path": "/var/run/alma8cpr-dst.sock"}}' +Launch the migration: +QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell +QMPSOCK=/var/run/alma8qmp-src.sock + +$QMPSHELL -p $QMPSOCK < /dev/tty3 + done +done + +echo "bug could not be reproduced" +exit 0 +The bug itself seems to remain unfixed, as I was able to reproduce that +with Fedora 41 guest, as well as AlmaLinux 8 guest. However our +cpr-transfer code also seems to be buggy as it triggers the crash - +without the cpr-transfer migration the above reproduce doesn't lead to +crash on the source VM. + +I suspect that, as cpr-transfer doesn't migrate the guest memory, but +rather passes it through the memory backend object, our code might +somehow corrupt the VRAM. However, I wasn't able to trace the +corruption so far. + +Could somebody help the investigation and take a look into this? Any +suggestions would be appreciated. Thanks! +Possibly some memory region created by qxl is not being preserved. +Try adding these traces to see what is preserved: + +-trace enable='*cpr*' +-trace enable='*ram_alloc*' + +- Steve + +On 2/28/2025 1:13 PM, Steven Sistare wrote: +On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: +Hi all, + +We've been experimenting with cpr-transfer migration mode recently and +have discovered the following issue with the guest QXL driver: + +Run migration source: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-src.sock + +$EMULATOR -enable-kvm \ +     -machine q35 \ +     -cpu host -smp 2 -m 2G \ +     -object +memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ram0,share=on\ +     -machine memory-backend=ram0 \ +     -machine aux-ram-share=on \ +     -drive file=$ROOTFS,media=disk,if=virtio \ +     -qmp unix:$QMPSOCK,server=on,wait=off \ +     -nographic \ +     -device qxl-vga +Run migration target: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-dst.sock +$EMULATOR -enable-kvm \ +     -machine q35 \ +     -cpu host -smp 2 -m 2G \ +     -object +memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ram0,share=on\ +     -machine memory-backend=ram0 \ +     -machine aux-ram-share=on \ +     -drive file=$ROOTFS,media=disk,if=virtio \ +     -qmp unix:$QMPSOCK,server=on,wait=off \ +     -nographic \ +     -device qxl-vga \ +     -incoming tcp:0:44444 \ +     -incoming '{"channel-type": "cpr", "addr": { "transport": "socket", "type": "unix", +"path": "/var/run/alma8cpr-dst.sock"}}' +Launch the migration: +QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell +QMPSOCK=/var/run/alma8qmp-src.sock + +$QMPSHELL -p $QMPSOCK < /dev/tty3 +         done +done + +echo "bug could not be reproduced" +exit 0 +The bug itself seems to remain unfixed, as I was able to reproduce that +with Fedora 41 guest, as well as AlmaLinux 8 guest. However our +cpr-transfer code also seems to be buggy as it triggers the crash - +without the cpr-transfer migration the above reproduce doesn't lead to +crash on the source VM. + +I suspect that, as cpr-transfer doesn't migrate the guest memory, but +rather passes it through the memory backend object, our code might +somehow corrupt the VRAM.  However, I wasn't able to trace the +corruption so far. + +Could somebody help the investigation and take a look into this?  Any +suggestions would be appreciated.  Thanks! +Possibly some memory region created by qxl is not being preserved. +Try adding these traces to see what is preserved: + +-trace enable='*cpr*' +-trace enable='*ram_alloc*' +Also try adding this patch to see if it flags any ram blocks as not +compatible with cpr. A message is printed at migration start time. +1740667681-257312-1-git-send-email-steven.sistare@oracle.com +/">https://lore.kernel.org/qemu-devel/ +1740667681-257312-1-git-send-email-steven.sistare@oracle.com +/ +- Steve + +On 2/28/25 8:20 PM, Steven Sistare wrote: +> +On 2/28/2025 1:13 PM, Steven Sistare wrote: +> +> On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: +> +>> Hi all, +> +>> +> +>> We've been experimenting with cpr-transfer migration mode recently and +> +>> have discovered the following issue with the guest QXL driver: +> +>> +> +>> Run migration source: +> +>>> EMULATOR=/path/to/emulator +> +>>> ROOTFS=/path/to/image +> +>>> QMPSOCK=/var/run/alma8qmp-src.sock +> +>>> +> +>>> $EMULATOR -enable-kvm \ +> +>>>      -machine q35 \ +> +>>>      -cpu host -smp 2 -m 2G \ +> +>>>      -object memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ +> +>>> ram0,share=on\ +> +>>>      -machine memory-backend=ram0 \ +> +>>>      -machine aux-ram-share=on \ +> +>>>      -drive file=$ROOTFS,media=disk,if=virtio \ +> +>>>      -qmp unix:$QMPSOCK,server=on,wait=off \ +> +>>>      -nographic \ +> +>>>      -device qxl-vga +> +>> +> +>> Run migration target: +> +>>> EMULATOR=/path/to/emulator +> +>>> ROOTFS=/path/to/image +> +>>> QMPSOCK=/var/run/alma8qmp-dst.sock +> +>>> $EMULATOR -enable-kvm \ +> +>>>      -machine q35 \ +> +>>>      -cpu host -smp 2 -m 2G \ +> +>>>      -object memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ +> +>>> ram0,share=on\ +> +>>>      -machine memory-backend=ram0 \ +> +>>>      -machine aux-ram-share=on \ +> +>>>      -drive file=$ROOTFS,media=disk,if=virtio \ +> +>>>      -qmp unix:$QMPSOCK,server=on,wait=off \ +> +>>>      -nographic \ +> +>>>      -device qxl-vga \ +> +>>>      -incoming tcp:0:44444 \ +> +>>>      -incoming '{"channel-type": "cpr", "addr": { "transport": +> +>>> "socket", "type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' +> +>> +> +>> +> +>> Launch the migration: +> +>>> QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell +> +>>> QMPSOCK=/var/run/alma8qmp-src.sock +> +>>> +> +>>> $QMPSHELL -p $QMPSOCK < +>>>      migrate-set-parameters mode=cpr-transfer +> +>>>      migrate channels=[{"channel-type":"main","addr": +> +>>> {"transport":"socket","type":"inet","host":"0","port":"44444"}}, +> +>>> {"channel-type":"cpr","addr": +> +>>> {"transport":"socket","type":"unix","path":"/var/run/alma8cpr- +> +>>> dst.sock"}}] +> +>>> EOF +> +>> +> +>> Then, after a while, QXL guest driver on target crashes spewing the +> +>> following messages: +> +>>> [   73.962002] [TTM] Buffer eviction failed +> +>>> [   73.962072] qxl 0000:00:02.0: object_init failed for (3149824, +> +>>> 0x00000001) +> +>>> [   73.962081] [drm:qxl_alloc_bo_reserved [qxl]] *ERROR* failed to +> +>>> allocate VRAM BO +> +>> +> +>> That seems to be a known kernel QXL driver bug: +> +>> +> +>> +https://lore.kernel.org/all/20220907094423.93581-1-min_halo@163.com/T/ +> +>> +https://lore.kernel.org/lkml/ZTgydqRlK6WX_b29@eldamar.lan/ +> +>> +> +>> (the latter discussion contains that reproduce script which speeds up +> +>> the crash in the guest): +> +>>> #!/bin/bash +> +>>> +> +>>> chvt 3 +> +>>> +> +>>> for j in $(seq 80); do +> +>>>          echo "$(date) starting round $j" +> +>>>          if [ "$(journalctl --boot | grep "failed to allocate VRAM +> +>>> BO")" != "" ]; then +> +>>>                  echo "bug was reproduced after $j tries" +> +>>>                  exit 1 +> +>>>          fi +> +>>>          for i in $(seq 100); do +> +>>>                  dmesg > /dev/tty3 +> +>>>          done +> +>>> done +> +>>> +> +>>> echo "bug could not be reproduced" +> +>>> exit 0 +> +>> +> +>> The bug itself seems to remain unfixed, as I was able to reproduce that +> +>> with Fedora 41 guest, as well as AlmaLinux 8 guest. However our +> +>> cpr-transfer code also seems to be buggy as it triggers the crash - +> +>> without the cpr-transfer migration the above reproduce doesn't lead to +> +>> crash on the source VM. +> +>> +> +>> I suspect that, as cpr-transfer doesn't migrate the guest memory, but +> +>> rather passes it through the memory backend object, our code might +> +>> somehow corrupt the VRAM.  However, I wasn't able to trace the +> +>> corruption so far. +> +>> +> +>> Could somebody help the investigation and take a look into this?  Any +> +>> suggestions would be appreciated.  Thanks! +> +> +> +> Possibly some memory region created by qxl is not being preserved. +> +> Try adding these traces to see what is preserved: +> +> +> +> -trace enable='*cpr*' +> +> -trace enable='*ram_alloc*' +> +> +Also try adding this patch to see if it flags any ram blocks as not +> +compatible with cpr.  A message is printed at migration start time. +> + +https://lore.kernel.org/qemu-devel/1740667681-257312-1-git-send-email- +> +steven.sistare@oracle.com/ +> +> +- Steve +> +With the traces enabled + the "migration: ram block cpr blockers" patch +applied: + +Source: +> +cpr_find_fd pc.bios, id 0 returns -1 +> +cpr_save_fd pc.bios, id 0, fd 22 +> +qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 22 host +> +0x7fec18e00000 +> +cpr_find_fd pc.rom, id 0 returns -1 +> +cpr_save_fd pc.rom, id 0, fd 23 +> +qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 23 host +> +0x7fec18c00000 +> +cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns -1 +> +cpr_save_fd 0000:00:01.0/e1000e.rom, id 0, fd 24 +> +qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size 262144 fd +> +24 host 0x7fec18a00000 +> +cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns -1 +> +cpr_save_fd 0000:00:02.0/vga.vram, id 0, fd 25 +> +qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size 67108864 +> +fd 25 host 0x7feb77e00000 +> +cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns -1 +> +cpr_save_fd 0000:00:02.0/qxl.vrom, id 0, fd 27 +> +qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 fd 27 +> +host 0x7fec18800000 +> +cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns -1 +> +cpr_save_fd 0000:00:02.0/qxl.vram, id 0, fd 28 +> +qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size 67108864 +> +fd 28 host 0x7feb73c00000 +> +cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns -1 +> +cpr_save_fd 0000:00:02.0/qxl.rom, id 0, fd 34 +> +qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 fd 34 +> +host 0x7fec18600000 +> +cpr_find_fd /rom@etc/acpi/tables, id 0 returns -1 +> +cpr_save_fd /rom@etc/acpi/tables, id 0, fd 35 +> +qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size 2097152 fd 35 +> +host 0x7fec18200000 +> +cpr_find_fd /rom@etc/table-loader, id 0 returns -1 +> +cpr_save_fd /rom@etc/table-loader, id 0, fd 36 +> +qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 fd 36 +> +host 0x7feb8b600000 +> +cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns -1 +> +cpr_save_fd /rom@etc/acpi/rsdp, id 0, fd 37 +> +qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd 37 host +> +0x7feb8b400000 +> +> +cpr_state_save cpr-transfer mode +> +cpr_transfer_output /var/run/alma8cpr-dst.sock +Target: +> +cpr_transfer_input /var/run/alma8cpr-dst.sock +> +cpr_state_load cpr-transfer mode +> +cpr_find_fd pc.bios, id 0 returns 20 +> +qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 20 host +> +0x7fcdc9800000 +> +cpr_find_fd pc.rom, id 0 returns 19 +> +qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 19 host +> +0x7fcdc9600000 +> +cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns 18 +> +qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size 262144 fd +> +18 host 0x7fcdc9400000 +> +cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns 17 +> +qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size 67108864 +> +fd 17 host 0x7fcd27e00000 +> +cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns 16 +> +qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 fd 16 +> +host 0x7fcdc9200000 +> +cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns 15 +> +qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size 67108864 +> +fd 15 host 0x7fcd23c00000 +> +cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns 14 +> +qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 fd 14 +> +host 0x7fcdc8800000 +> +cpr_find_fd /rom@etc/acpi/tables, id 0 returns 13 +> +qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size 2097152 fd 13 +> +host 0x7fcdc8400000 +> +cpr_find_fd /rom@etc/table-loader, id 0 returns 11 +> +qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 fd 11 +> +host 0x7fcdc8200000 +> +cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns 10 +> +qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd 10 host +> +0x7fcd3be00000 +Looks like both vga.vram and qxl.vram are being preserved (with the same +addresses), and no incompatible ram blocks are found during migration. + +Andrey + +On 2/28/25 8:35 PM, Andrey Drobyshev wrote: +> +On 2/28/25 8:20 PM, Steven Sistare wrote: +> +> On 2/28/2025 1:13 PM, Steven Sistare wrote: +> +>> On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: +> +>>> Hi all, +> +>>> +> +>>> We've been experimenting with cpr-transfer migration mode recently and +> +>>> have discovered the following issue with the guest QXL driver: +> +>>> +> +>>> Run migration source: +> +>>>> EMULATOR=/path/to/emulator +> +>>>> ROOTFS=/path/to/image +> +>>>> QMPSOCK=/var/run/alma8qmp-src.sock +> +>>>> +> +>>>> $EMULATOR -enable-kvm \ +> +>>>>      -machine q35 \ +> +>>>>      -cpu host -smp 2 -m 2G \ +> +>>>>      -object memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ +> +>>>> ram0,share=on\ +> +>>>>      -machine memory-backend=ram0 \ +> +>>>>      -machine aux-ram-share=on \ +> +>>>>      -drive file=$ROOTFS,media=disk,if=virtio \ +> +>>>>      -qmp unix:$QMPSOCK,server=on,wait=off \ +> +>>>>      -nographic \ +> +>>>>      -device qxl-vga +> +>>> +> +>>> Run migration target: +> +>>>> EMULATOR=/path/to/emulator +> +>>>> ROOTFS=/path/to/image +> +>>>> QMPSOCK=/var/run/alma8qmp-dst.sock +> +>>>> $EMULATOR -enable-kvm \ +> +>>>>      -machine q35 \ +> +>>>>      -cpu host -smp 2 -m 2G \ +> +>>>>      -object memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ +> +>>>> ram0,share=on\ +> +>>>>      -machine memory-backend=ram0 \ +> +>>>>      -machine aux-ram-share=on \ +> +>>>>      -drive file=$ROOTFS,media=disk,if=virtio \ +> +>>>>      -qmp unix:$QMPSOCK,server=on,wait=off \ +> +>>>>      -nographic \ +> +>>>>      -device qxl-vga \ +> +>>>>      -incoming tcp:0:44444 \ +> +>>>>      -incoming '{"channel-type": "cpr", "addr": { "transport": +> +>>>> "socket", "type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' +> +>>> +> +>>> +> +>>> Launch the migration: +> +>>>> QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell +> +>>>> QMPSOCK=/var/run/alma8qmp-src.sock +> +>>>> +> +>>>> $QMPSHELL -p $QMPSOCK < +>>>>      migrate-set-parameters mode=cpr-transfer +> +>>>>      migrate channels=[{"channel-type":"main","addr": +> +>>>> {"transport":"socket","type":"inet","host":"0","port":"44444"}}, +> +>>>> {"channel-type":"cpr","addr": +> +>>>> {"transport":"socket","type":"unix","path":"/var/run/alma8cpr- +> +>>>> dst.sock"}}] +> +>>>> EOF +> +>>> +> +>>> Then, after a while, QXL guest driver on target crashes spewing the +> +>>> following messages: +> +>>>> [   73.962002] [TTM] Buffer eviction failed +> +>>>> [   73.962072] qxl 0000:00:02.0: object_init failed for (3149824, +> +>>>> 0x00000001) +> +>>>> [   73.962081] [drm:qxl_alloc_bo_reserved [qxl]] *ERROR* failed to +> +>>>> allocate VRAM BO +> +>>> +> +>>> That seems to be a known kernel QXL driver bug: +> +>>> +> +>>> +https://lore.kernel.org/all/20220907094423.93581-1-min_halo@163.com/T/ +> +>>> +https://lore.kernel.org/lkml/ZTgydqRlK6WX_b29@eldamar.lan/ +> +>>> +> +>>> (the latter discussion contains that reproduce script which speeds up +> +>>> the crash in the guest): +> +>>>> #!/bin/bash +> +>>>> +> +>>>> chvt 3 +> +>>>> +> +>>>> for j in $(seq 80); do +> +>>>>          echo "$(date) starting round $j" +> +>>>>          if [ "$(journalctl --boot | grep "failed to allocate VRAM +> +>>>> BO")" != "" ]; then +> +>>>>                  echo "bug was reproduced after $j tries" +> +>>>>                  exit 1 +> +>>>>          fi +> +>>>>          for i in $(seq 100); do +> +>>>>                  dmesg > /dev/tty3 +> +>>>>          done +> +>>>> done +> +>>>> +> +>>>> echo "bug could not be reproduced" +> +>>>> exit 0 +> +>>> +> +>>> The bug itself seems to remain unfixed, as I was able to reproduce that +> +>>> with Fedora 41 guest, as well as AlmaLinux 8 guest. However our +> +>>> cpr-transfer code also seems to be buggy as it triggers the crash - +> +>>> without the cpr-transfer migration the above reproduce doesn't lead to +> +>>> crash on the source VM. +> +>>> +> +>>> I suspect that, as cpr-transfer doesn't migrate the guest memory, but +> +>>> rather passes it through the memory backend object, our code might +> +>>> somehow corrupt the VRAM.  However, I wasn't able to trace the +> +>>> corruption so far. +> +>>> +> +>>> Could somebody help the investigation and take a look into this?  Any +> +>>> suggestions would be appreciated.  Thanks! +> +>> +> +>> Possibly some memory region created by qxl is not being preserved. +> +>> Try adding these traces to see what is preserved: +> +>> +> +>> -trace enable='*cpr*' +> +>> -trace enable='*ram_alloc*' +> +> +> +> Also try adding this patch to see if it flags any ram blocks as not +> +> compatible with cpr.  A message is printed at migration start time. +> +>  +https://lore.kernel.org/qemu-devel/1740667681-257312-1-git-send-email- +> +> steven.sistare@oracle.com/ +> +> +> +> - Steve +> +> +> +> +With the traces enabled + the "migration: ram block cpr blockers" patch +> +applied: +> +> +Source: +> +> cpr_find_fd pc.bios, id 0 returns -1 +> +> cpr_save_fd pc.bios, id 0, fd 22 +> +> qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 22 host +> +> 0x7fec18e00000 +> +> cpr_find_fd pc.rom, id 0 returns -1 +> +> cpr_save_fd pc.rom, id 0, fd 23 +> +> qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 23 host +> +> 0x7fec18c00000 +> +> cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns -1 +> +> cpr_save_fd 0000:00:01.0/e1000e.rom, id 0, fd 24 +> +> qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size 262144 fd +> +> 24 host 0x7fec18a00000 +> +> cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns -1 +> +> cpr_save_fd 0000:00:02.0/vga.vram, id 0, fd 25 +> +> qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size 67108864 +> +> fd 25 host 0x7feb77e00000 +> +> cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns -1 +> +> cpr_save_fd 0000:00:02.0/qxl.vrom, id 0, fd 27 +> +> qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 fd 27 +> +> host 0x7fec18800000 +> +> cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns -1 +> +> cpr_save_fd 0000:00:02.0/qxl.vram, id 0, fd 28 +> +> qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size 67108864 +> +> fd 28 host 0x7feb73c00000 +> +> cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns -1 +> +> cpr_save_fd 0000:00:02.0/qxl.rom, id 0, fd 34 +> +> qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 fd 34 +> +> host 0x7fec18600000 +> +> cpr_find_fd /rom@etc/acpi/tables, id 0 returns -1 +> +> cpr_save_fd /rom@etc/acpi/tables, id 0, fd 35 +> +> qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size 2097152 fd +> +> 35 host 0x7fec18200000 +> +> cpr_find_fd /rom@etc/table-loader, id 0 returns -1 +> +> cpr_save_fd /rom@etc/table-loader, id 0, fd 36 +> +> qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 fd 36 +> +> host 0x7feb8b600000 +> +> cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns -1 +> +> cpr_save_fd /rom@etc/acpi/rsdp, id 0, fd 37 +> +> qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd 37 host +> +> 0x7feb8b400000 +> +> +> +> cpr_state_save cpr-transfer mode +> +> cpr_transfer_output /var/run/alma8cpr-dst.sock +> +> +Target: +> +> cpr_transfer_input /var/run/alma8cpr-dst.sock +> +> cpr_state_load cpr-transfer mode +> +> cpr_find_fd pc.bios, id 0 returns 20 +> +> qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 20 host +> +> 0x7fcdc9800000 +> +> cpr_find_fd pc.rom, id 0 returns 19 +> +> qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 19 host +> +> 0x7fcdc9600000 +> +> cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns 18 +> +> qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size 262144 fd +> +> 18 host 0x7fcdc9400000 +> +> cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns 17 +> +> qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size 67108864 +> +> fd 17 host 0x7fcd27e00000 +> +> cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns 16 +> +> qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 fd 16 +> +> host 0x7fcdc9200000 +> +> cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns 15 +> +> qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size 67108864 +> +> fd 15 host 0x7fcd23c00000 +> +> cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns 14 +> +> qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 fd 14 +> +> host 0x7fcdc8800000 +> +> cpr_find_fd /rom@etc/acpi/tables, id 0 returns 13 +> +> qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size 2097152 fd +> +> 13 host 0x7fcdc8400000 +> +> cpr_find_fd /rom@etc/table-loader, id 0 returns 11 +> +> qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 fd 11 +> +> host 0x7fcdc8200000 +> +> cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns 10 +> +> qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd 10 host +> +> 0x7fcd3be00000 +> +> +Looks like both vga.vram and qxl.vram are being preserved (with the same +> +addresses), and no incompatible ram blocks are found during migration. +> +Sorry, addressed are not the same, of course. However corresponding ram +blocks do seem to be preserved and initialized. + +On 2/28/2025 1:37 PM, Andrey Drobyshev wrote: +On 2/28/25 8:35 PM, Andrey Drobyshev wrote: +On 2/28/25 8:20 PM, Steven Sistare wrote: +On 2/28/2025 1:13 PM, Steven Sistare wrote: +On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: +Hi all, + +We've been experimenting with cpr-transfer migration mode recently and +have discovered the following issue with the guest QXL driver: + +Run migration source: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-src.sock + +$EMULATOR -enable-kvm \ +      -machine q35 \ +      -cpu host -smp 2 -m 2G \ +      -object memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ +ram0,share=on\ +      -machine memory-backend=ram0 \ +      -machine aux-ram-share=on \ +      -drive file=$ROOTFS,media=disk,if=virtio \ +      -qmp unix:$QMPSOCK,server=on,wait=off \ +      -nographic \ +      -device qxl-vga +Run migration target: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-dst.sock +$EMULATOR -enable-kvm \ +      -machine q35 \ +      -cpu host -smp 2 -m 2G \ +      -object memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ +ram0,share=on\ +      -machine memory-backend=ram0 \ +      -machine aux-ram-share=on \ +      -drive file=$ROOTFS,media=disk,if=virtio \ +      -qmp unix:$QMPSOCK,server=on,wait=off \ +      -nographic \ +      -device qxl-vga \ +      -incoming tcp:0:44444 \ +      -incoming '{"channel-type": "cpr", "addr": { "transport": +"socket", "type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' +Launch the migration: +QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell +QMPSOCK=/var/run/alma8qmp-src.sock + +$QMPSHELL -p $QMPSOCK < /dev/tty3 +          done +done + +echo "bug could not be reproduced" +exit 0 +The bug itself seems to remain unfixed, as I was able to reproduce that +with Fedora 41 guest, as well as AlmaLinux 8 guest. However our +cpr-transfer code also seems to be buggy as it triggers the crash - +without the cpr-transfer migration the above reproduce doesn't lead to +crash on the source VM. + +I suspect that, as cpr-transfer doesn't migrate the guest memory, but +rather passes it through the memory backend object, our code might +somehow corrupt the VRAM.  However, I wasn't able to trace the +corruption so far. + +Could somebody help the investigation and take a look into this?  Any +suggestions would be appreciated.  Thanks! +Possibly some memory region created by qxl is not being preserved. +Try adding these traces to see what is preserved: + +-trace enable='*cpr*' +-trace enable='*ram_alloc*' +Also try adding this patch to see if it flags any ram blocks as not +compatible with cpr.  A message is printed at migration start time. +  +https://lore.kernel.org/qemu-devel/1740667681-257312-1-git-send-email- +steven.sistare@oracle.com/ + +- Steve +With the traces enabled + the "migration: ram block cpr blockers" patch +applied: + +Source: +cpr_find_fd pc.bios, id 0 returns -1 +cpr_save_fd pc.bios, id 0, fd 22 +qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 22 host +0x7fec18e00000 +cpr_find_fd pc.rom, id 0 returns -1 +cpr_save_fd pc.rom, id 0, fd 23 +qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 23 host +0x7fec18c00000 +cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns -1 +cpr_save_fd 0000:00:01.0/e1000e.rom, id 0, fd 24 +qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size 262144 fd 24 +host 0x7fec18a00000 +cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns -1 +cpr_save_fd 0000:00:02.0/vga.vram, id 0, fd 25 +qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size 67108864 fd +25 host 0x7feb77e00000 +cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.vrom, id 0, fd 27 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 fd 27 host +0x7fec18800000 +cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.vram, id 0, fd 28 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size 67108864 fd +28 host 0x7feb73c00000 +cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.rom, id 0, fd 34 +qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 fd 34 host +0x7fec18600000 +cpr_find_fd /rom@etc/acpi/tables, id 0 returns -1 +cpr_save_fd /rom@etc/acpi/tables, id 0, fd 35 +qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size 2097152 fd 35 +host 0x7fec18200000 +cpr_find_fd /rom@etc/table-loader, id 0 returns -1 +cpr_save_fd /rom@etc/table-loader, id 0, fd 36 +qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 fd 36 host +0x7feb8b600000 +cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns -1 +cpr_save_fd /rom@etc/acpi/rsdp, id 0, fd 37 +qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd 37 host +0x7feb8b400000 + +cpr_state_save cpr-transfer mode +cpr_transfer_output /var/run/alma8cpr-dst.sock +Target: +cpr_transfer_input /var/run/alma8cpr-dst.sock +cpr_state_load cpr-transfer mode +cpr_find_fd pc.bios, id 0 returns 20 +qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 20 host +0x7fcdc9800000 +cpr_find_fd pc.rom, id 0 returns 19 +qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 19 host +0x7fcdc9600000 +cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns 18 +qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size 262144 fd 18 +host 0x7fcdc9400000 +cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns 17 +qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size 67108864 fd +17 host 0x7fcd27e00000 +cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns 16 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 fd 16 host +0x7fcdc9200000 +cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns 15 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size 67108864 fd +15 host 0x7fcd23c00000 +cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns 14 +qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 fd 14 host +0x7fcdc8800000 +cpr_find_fd /rom@etc/acpi/tables, id 0 returns 13 +qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size 2097152 fd 13 +host 0x7fcdc8400000 +cpr_find_fd /rom@etc/table-loader, id 0 returns 11 +qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 fd 11 host +0x7fcdc8200000 +cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns 10 +qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd 10 host +0x7fcd3be00000 +Looks like both vga.vram and qxl.vram are being preserved (with the same +addresses), and no incompatible ram blocks are found during migration. +Sorry, addressed are not the same, of course. However corresponding ram +blocks do seem to be preserved and initialized. +So far, I have not reproduced the guest driver failure. + +However, I have isolated places where new QEMU improperly writes to +the qxl memory regions prior to starting the guest, by mmap'ing them +readonly after cpr: + + qemu_ram_alloc_internal() + if (reused && (strstr(name, "qxl") || strstr("name", "vga"))) + ram_flags |= RAM_READONLY; + new_block = qemu_ram_alloc_from_fd(...) + +I have attached a draft fix; try it and let me know. +My console window looks fine before and after cpr, using +-vnc $hostip:0 -vga qxl + +- Steve +0001-hw-qxl-cpr-support-preliminary.patch +Description: +Text document + +On 3/4/25 9:05 PM, Steven Sistare wrote: +> +On 2/28/2025 1:37 PM, Andrey Drobyshev wrote: +> +> On 2/28/25 8:35 PM, Andrey Drobyshev wrote: +> +>> On 2/28/25 8:20 PM, Steven Sistare wrote: +> +>>> On 2/28/2025 1:13 PM, Steven Sistare wrote: +> +>>>> On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: +> +>>>>> Hi all, +> +>>>>> +> +>>>>> We've been experimenting with cpr-transfer migration mode recently +> +>>>>> and +> +>>>>> have discovered the following issue with the guest QXL driver: +> +>>>>> +> +>>>>> Run migration source: +> +>>>>>> EMULATOR=/path/to/emulator +> +>>>>>> ROOTFS=/path/to/image +> +>>>>>> QMPSOCK=/var/run/alma8qmp-src.sock +> +>>>>>> +> +>>>>>> $EMULATOR -enable-kvm \ +> +>>>>>>       -machine q35 \ +> +>>>>>>       -cpu host -smp 2 -m 2G \ +> +>>>>>>       -object memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ +> +>>>>>> ram0,share=on\ +> +>>>>>>       -machine memory-backend=ram0 \ +> +>>>>>>       -machine aux-ram-share=on \ +> +>>>>>>       -drive file=$ROOTFS,media=disk,if=virtio \ +> +>>>>>>       -qmp unix:$QMPSOCK,server=on,wait=off \ +> +>>>>>>       -nographic \ +> +>>>>>>       -device qxl-vga +> +>>>>> +> +>>>>> Run migration target: +> +>>>>>> EMULATOR=/path/to/emulator +> +>>>>>> ROOTFS=/path/to/image +> +>>>>>> QMPSOCK=/var/run/alma8qmp-dst.sock +> +>>>>>> $EMULATOR -enable-kvm \ +> +>>>>>>       -machine q35 \ +> +>>>>>>       -cpu host -smp 2 -m 2G \ +> +>>>>>>       -object memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ +> +>>>>>> ram0,share=on\ +> +>>>>>>       -machine memory-backend=ram0 \ +> +>>>>>>       -machine aux-ram-share=on \ +> +>>>>>>       -drive file=$ROOTFS,media=disk,if=virtio \ +> +>>>>>>       -qmp unix:$QMPSOCK,server=on,wait=off \ +> +>>>>>>       -nographic \ +> +>>>>>>       -device qxl-vga \ +> +>>>>>>       -incoming tcp:0:44444 \ +> +>>>>>>       -incoming '{"channel-type": "cpr", "addr": { "transport": +> +>>>>>> "socket", "type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' +> +>>>>> +> +>>>>> +> +>>>>> Launch the migration: +> +>>>>>> QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell +> +>>>>>> QMPSOCK=/var/run/alma8qmp-src.sock +> +>>>>>> +> +>>>>>> $QMPSHELL -p $QMPSOCK < +>>>>>>       migrate-set-parameters mode=cpr-transfer +> +>>>>>>       migrate channels=[{"channel-type":"main","addr": +> +>>>>>> {"transport":"socket","type":"inet","host":"0","port":"44444"}}, +> +>>>>>> {"channel-type":"cpr","addr": +> +>>>>>> {"transport":"socket","type":"unix","path":"/var/run/alma8cpr- +> +>>>>>> dst.sock"}}] +> +>>>>>> EOF +> +>>>>> +> +>>>>> Then, after a while, QXL guest driver on target crashes spewing the +> +>>>>> following messages: +> +>>>>>> [   73.962002] [TTM] Buffer eviction failed +> +>>>>>> [   73.962072] qxl 0000:00:02.0: object_init failed for (3149824, +> +>>>>>> 0x00000001) +> +>>>>>> [   73.962081] [drm:qxl_alloc_bo_reserved [qxl]] *ERROR* failed to +> +>>>>>> allocate VRAM BO +> +>>>>> +> +>>>>> That seems to be a known kernel QXL driver bug: +> +>>>>> +> +>>>>> +https://lore.kernel.org/all/20220907094423.93581-1- +> +>>>>> min_halo@163.com/T/ +> +>>>>> +https://lore.kernel.org/lkml/ZTgydqRlK6WX_b29@eldamar.lan/ +> +>>>>> +> +>>>>> (the latter discussion contains that reproduce script which speeds up +> +>>>>> the crash in the guest): +> +>>>>>> #!/bin/bash +> +>>>>>> +> +>>>>>> chvt 3 +> +>>>>>> +> +>>>>>> for j in $(seq 80); do +> +>>>>>>           echo "$(date) starting round $j" +> +>>>>>>           if [ "$(journalctl --boot | grep "failed to allocate VRAM +> +>>>>>> BO")" != "" ]; then +> +>>>>>>                   echo "bug was reproduced after $j tries" +> +>>>>>>                   exit 1 +> +>>>>>>           fi +> +>>>>>>           for i in $(seq 100); do +> +>>>>>>                   dmesg > /dev/tty3 +> +>>>>>>           done +> +>>>>>> done +> +>>>>>> +> +>>>>>> echo "bug could not be reproduced" +> +>>>>>> exit 0 +> +>>>>> +> +>>>>> The bug itself seems to remain unfixed, as I was able to reproduce +> +>>>>> that +> +>>>>> with Fedora 41 guest, as well as AlmaLinux 8 guest. However our +> +>>>>> cpr-transfer code also seems to be buggy as it triggers the crash - +> +>>>>> without the cpr-transfer migration the above reproduce doesn't +> +>>>>> lead to +> +>>>>> crash on the source VM. +> +>>>>> +> +>>>>> I suspect that, as cpr-transfer doesn't migrate the guest memory, but +> +>>>>> rather passes it through the memory backend object, our code might +> +>>>>> somehow corrupt the VRAM.  However, I wasn't able to trace the +> +>>>>> corruption so far. +> +>>>>> +> +>>>>> Could somebody help the investigation and take a look into this?  Any +> +>>>>> suggestions would be appreciated.  Thanks! +> +>>>> +> +>>>> Possibly some memory region created by qxl is not being preserved. +> +>>>> Try adding these traces to see what is preserved: +> +>>>> +> +>>>> -trace enable='*cpr*' +> +>>>> -trace enable='*ram_alloc*' +> +>>> +> +>>> Also try adding this patch to see if it flags any ram blocks as not +> +>>> compatible with cpr.  A message is printed at migration start time. +> +>>>   +https://lore.kernel.org/qemu-devel/1740667681-257312-1-git-send- +> +>>> email- +> +>>> steven.sistare@oracle.com/ +> +>>> +> +>>> - Steve +> +>>> +> +>> +> +>> With the traces enabled + the "migration: ram block cpr blockers" patch +> +>> applied: +> +>> +> +>> Source: +> +>>> cpr_find_fd pc.bios, id 0 returns -1 +> +>>> cpr_save_fd pc.bios, id 0, fd 22 +> +>>> qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 22 host +> +>>> 0x7fec18e00000 +> +>>> cpr_find_fd pc.rom, id 0 returns -1 +> +>>> cpr_save_fd pc.rom, id 0, fd 23 +> +>>> qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 23 host +> +>>> 0x7fec18c00000 +> +>>> cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns -1 +> +>>> cpr_save_fd 0000:00:01.0/e1000e.rom, id 0, fd 24 +> +>>> qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size +> +>>> 262144 fd 24 host 0x7fec18a00000 +> +>>> cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns -1 +> +>>> cpr_save_fd 0000:00:02.0/vga.vram, id 0, fd 25 +> +>>> qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size +> +>>> 67108864 fd 25 host 0x7feb77e00000 +> +>>> cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns -1 +> +>>> cpr_save_fd 0000:00:02.0/qxl.vrom, id 0, fd 27 +> +>>> qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 +> +>>> fd 27 host 0x7fec18800000 +> +>>> cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns -1 +> +>>> cpr_save_fd 0000:00:02.0/qxl.vram, id 0, fd 28 +> +>>> qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size +> +>>> 67108864 fd 28 host 0x7feb73c00000 +> +>>> cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns -1 +> +>>> cpr_save_fd 0000:00:02.0/qxl.rom, id 0, fd 34 +> +>>> qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 +> +>>> fd 34 host 0x7fec18600000 +> +>>> cpr_find_fd /rom@etc/acpi/tables, id 0 returns -1 +> +>>> cpr_save_fd /rom@etc/acpi/tables, id 0, fd 35 +> +>>> qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size +> +>>> 2097152 fd 35 host 0x7fec18200000 +> +>>> cpr_find_fd /rom@etc/table-loader, id 0 returns -1 +> +>>> cpr_save_fd /rom@etc/table-loader, id 0, fd 36 +> +>>> qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 +> +>>> fd 36 host 0x7feb8b600000 +> +>>> cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns -1 +> +>>> cpr_save_fd /rom@etc/acpi/rsdp, id 0, fd 37 +> +>>> qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd +> +>>> 37 host 0x7feb8b400000 +> +>>> +> +>>> cpr_state_save cpr-transfer mode +> +>>> cpr_transfer_output /var/run/alma8cpr-dst.sock +> +>> +> +>> Target: +> +>>> cpr_transfer_input /var/run/alma8cpr-dst.sock +> +>>> cpr_state_load cpr-transfer mode +> +>>> cpr_find_fd pc.bios, id 0 returns 20 +> +>>> qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 20 host +> +>>> 0x7fcdc9800000 +> +>>> cpr_find_fd pc.rom, id 0 returns 19 +> +>>> qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 19 host +> +>>> 0x7fcdc9600000 +> +>>> cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns 18 +> +>>> qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size +> +>>> 262144 fd 18 host 0x7fcdc9400000 +> +>>> cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns 17 +> +>>> qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size +> +>>> 67108864 fd 17 host 0x7fcd27e00000 +> +>>> cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns 16 +> +>>> qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 +> +>>> fd 16 host 0x7fcdc9200000 +> +>>> cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns 15 +> +>>> qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size +> +>>> 67108864 fd 15 host 0x7fcd23c00000 +> +>>> cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns 14 +> +>>> qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 +> +>>> fd 14 host 0x7fcdc8800000 +> +>>> cpr_find_fd /rom@etc/acpi/tables, id 0 returns 13 +> +>>> qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size +> +>>> 2097152 fd 13 host 0x7fcdc8400000 +> +>>> cpr_find_fd /rom@etc/table-loader, id 0 returns 11 +> +>>> qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 +> +>>> fd 11 host 0x7fcdc8200000 +> +>>> cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns 10 +> +>>> qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd +> +>>> 10 host 0x7fcd3be00000 +> +>> +> +>> Looks like both vga.vram and qxl.vram are being preserved (with the same +> +>> addresses), and no incompatible ram blocks are found during migration. +> +> +> +> Sorry, addressed are not the same, of course.  However corresponding ram +> +> blocks do seem to be preserved and initialized. +> +> +So far, I have not reproduced the guest driver failure. +> +> +However, I have isolated places where new QEMU improperly writes to +> +the qxl memory regions prior to starting the guest, by mmap'ing them +> +readonly after cpr: +> +> +  qemu_ram_alloc_internal() +> +    if (reused && (strstr(name, "qxl") || strstr("name", "vga"))) +> +        ram_flags |= RAM_READONLY; +> +    new_block = qemu_ram_alloc_from_fd(...) +> +> +I have attached a draft fix; try it and let me know. +> +My console window looks fine before and after cpr, using +> +-vnc $hostip:0 -vga qxl +> +> +- Steve +Regarding the reproduce: when I launch the buggy version with the same +options as you, i.e. "-vnc 0.0.0.0:$port -vga qxl", and do cpr-transfer, +my VNC client silently hangs on the target after a while. Could it +happen on your stand as well? Could you try launching VM with +"-nographic -device qxl-vga"? That way VM's serial console is given you +directly in the shell, so when qxl driver crashes you're still able to +inspect the kernel messages. + +As for your patch, I can report that it doesn't resolve the issue as it +is. But I was able to track down another possible memory corruption +using your approach with readonly mmap'ing: + +> +Program terminated with signal SIGSEGV, Segmentation fault. +> +#0 init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 +> +412 d->ram->magic = cpu_to_le32(QXL_RAM_MAGIC); +> +[Current thread is 1 (Thread 0x7f1a4f83b480 (LWP 229798))] +> +(gdb) bt +> +#0 init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 +> +#1 0x0000563896e7f467 in qxl_realize_common (qxl=0x5638996e0e70, +> +errp=0x7ffd3c2b8170) at ../hw/display/qxl.c:2142 +> +#2 0x0000563896e7fda1 in qxl_realize_primary (dev=0x5638996e0e70, +> +errp=0x7ffd3c2b81d0) at ../hw/display/qxl.c:2257 +> +#3 0x0000563896c7e8f2 in pci_qdev_realize (qdev=0x5638996e0e70, +> +errp=0x7ffd3c2b8250) at ../hw/pci/pci.c:2174 +> +#4 0x00005638970eb54b in device_set_realized (obj=0x5638996e0e70, +> +value=true, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:494 +> +#5 0x00005638970f5e14 in property_set_bool (obj=0x5638996e0e70, +> +v=0x5638996f3770, name=0x56389759b141 "realized", opaque=0x5638987893d0, +> +errp=0x7ffd3c2b84e0) +> +at ../qom/object.c:2374 +> +#6 0x00005638970f39f8 in object_property_set (obj=0x5638996e0e70, +> +name=0x56389759b141 "realized", v=0x5638996f3770, errp=0x7ffd3c2b84e0) +> +at ../qom/object.c:1449 +> +#7 0x00005638970f8586 in object_property_set_qobject (obj=0x5638996e0e70, +> +name=0x56389759b141 "realized", value=0x5638996df900, errp=0x7ffd3c2b84e0) +> +at ../qom/qom-qobject.c:28 +> +#8 0x00005638970f3d8d in object_property_set_bool (obj=0x5638996e0e70, +> +name=0x56389759b141 "realized", value=true, errp=0x7ffd3c2b84e0) +> +at ../qom/object.c:1519 +> +#9 0x00005638970eacb0 in qdev_realize (dev=0x5638996e0e70, +> +bus=0x563898cf3c20, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:276 +> +#10 0x0000563896dba675 in qdev_device_add_from_qdict (opts=0x5638996dfe50, +> +from_json=false, errp=0x7ffd3c2b84e0) at ../system/qdev-monitor.c:714 +> +#11 0x0000563896dba721 in qdev_device_add (opts=0x563898786150, +> +errp=0x56389855dc40 ) at ../system/qdev-monitor.c:733 +> +#12 0x0000563896dc48f1 in device_init_func (opaque=0x0, opts=0x563898786150, +> +errp=0x56389855dc40 ) at ../system/vl.c:1207 +> +#13 0x000056389737a6cc in qemu_opts_foreach +> +(list=0x563898427b60 , func=0x563896dc48ca +> +, opaque=0x0, errp=0x56389855dc40 ) +> +at ../util/qemu-option.c:1135 +> +#14 0x0000563896dc89b5 in qemu_create_cli_devices () at ../system/vl.c:2745 +> +#15 0x0000563896dc8c00 in qmp_x_exit_preconfig (errp=0x56389855dc40 +> +) at ../system/vl.c:2806 +> +#16 0x0000563896dcb5de in qemu_init (argc=33, argv=0x7ffd3c2b8948) at +> +../system/vl.c:3838 +> +#17 0x0000563897297323 in main (argc=33, argv=0x7ffd3c2b8948) at +> +../system/main.c:72 +So the attached adjusted version of your patch does seem to help. At +least I can't reproduce the crash on my stand. + +I'm wondering, could it be useful to explicitly mark all the reused +memory regions readonly upon cpr-transfer, and then make them writable +back again after the migration is done? That way we will be segfaulting +early on instead of debugging tricky memory corruptions. + +Andrey +0001-hw-qxl-cpr-support-preliminary.patch +Description: +Text Data + +On 3/5/2025 11:50 AM, Andrey Drobyshev wrote: +On 3/4/25 9:05 PM, Steven Sistare wrote: +On 2/28/2025 1:37 PM, Andrey Drobyshev wrote: +On 2/28/25 8:35 PM, Andrey Drobyshev wrote: +On 2/28/25 8:20 PM, Steven Sistare wrote: +On 2/28/2025 1:13 PM, Steven Sistare wrote: +On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: +Hi all, + +We've been experimenting with cpr-transfer migration mode recently +and +have discovered the following issue with the guest QXL driver: + +Run migration source: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-src.sock + +$EMULATOR -enable-kvm \ +       -machine q35 \ +       -cpu host -smp 2 -m 2G \ +       -object memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ +ram0,share=on\ +       -machine memory-backend=ram0 \ +       -machine aux-ram-share=on \ +       -drive file=$ROOTFS,media=disk,if=virtio \ +       -qmp unix:$QMPSOCK,server=on,wait=off \ +       -nographic \ +       -device qxl-vga +Run migration target: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-dst.sock +$EMULATOR -enable-kvm \ +       -machine q35 \ +       -cpu host -smp 2 -m 2G \ +       -object memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ +ram0,share=on\ +       -machine memory-backend=ram0 \ +       -machine aux-ram-share=on \ +       -drive file=$ROOTFS,media=disk,if=virtio \ +       -qmp unix:$QMPSOCK,server=on,wait=off \ +       -nographic \ +       -device qxl-vga \ +       -incoming tcp:0:44444 \ +       -incoming '{"channel-type": "cpr", "addr": { "transport": +"socket", "type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' +Launch the migration: +QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell +QMPSOCK=/var/run/alma8qmp-src.sock + +$QMPSHELL -p $QMPSOCK < /dev/tty3 +           done +done + +echo "bug could not be reproduced" +exit 0 +The bug itself seems to remain unfixed, as I was able to reproduce +that +with Fedora 41 guest, as well as AlmaLinux 8 guest. However our +cpr-transfer code also seems to be buggy as it triggers the crash - +without the cpr-transfer migration the above reproduce doesn't +lead to +crash on the source VM. + +I suspect that, as cpr-transfer doesn't migrate the guest memory, but +rather passes it through the memory backend object, our code might +somehow corrupt the VRAM.  However, I wasn't able to trace the +corruption so far. + +Could somebody help the investigation and take a look into this?  Any +suggestions would be appreciated.  Thanks! +Possibly some memory region created by qxl is not being preserved. +Try adding these traces to see what is preserved: + +-trace enable='*cpr*' +-trace enable='*ram_alloc*' +Also try adding this patch to see if it flags any ram blocks as not +compatible with cpr.  A message is printed at migration start time. +   +https://lore.kernel.org/qemu-devel/1740667681-257312-1-git-send- +email- +steven.sistare@oracle.com/ + +- Steve +With the traces enabled + the "migration: ram block cpr blockers" patch +applied: + +Source: +cpr_find_fd pc.bios, id 0 returns -1 +cpr_save_fd pc.bios, id 0, fd 22 +qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 22 host +0x7fec18e00000 +cpr_find_fd pc.rom, id 0 returns -1 +cpr_save_fd pc.rom, id 0, fd 23 +qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 23 host +0x7fec18c00000 +cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns -1 +cpr_save_fd 0000:00:01.0/e1000e.rom, id 0, fd 24 +qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size +262144 fd 24 host 0x7fec18a00000 +cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns -1 +cpr_save_fd 0000:00:02.0/vga.vram, id 0, fd 25 +qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size +67108864 fd 25 host 0x7feb77e00000 +cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.vrom, id 0, fd 27 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 +fd 27 host 0x7fec18800000 +cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.vram, id 0, fd 28 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size +67108864 fd 28 host 0x7feb73c00000 +cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.rom, id 0, fd 34 +qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 +fd 34 host 0x7fec18600000 +cpr_find_fd /rom@etc/acpi/tables, id 0 returns -1 +cpr_save_fd /rom@etc/acpi/tables, id 0, fd 35 +qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size +2097152 fd 35 host 0x7fec18200000 +cpr_find_fd /rom@etc/table-loader, id 0 returns -1 +cpr_save_fd /rom@etc/table-loader, id 0, fd 36 +qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 +fd 36 host 0x7feb8b600000 +cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns -1 +cpr_save_fd /rom@etc/acpi/rsdp, id 0, fd 37 +qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd +37 host 0x7feb8b400000 + +cpr_state_save cpr-transfer mode +cpr_transfer_output /var/run/alma8cpr-dst.sock +Target: +cpr_transfer_input /var/run/alma8cpr-dst.sock +cpr_state_load cpr-transfer mode +cpr_find_fd pc.bios, id 0 returns 20 +qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 20 host +0x7fcdc9800000 +cpr_find_fd pc.rom, id 0 returns 19 +qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 19 host +0x7fcdc9600000 +cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns 18 +qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size +262144 fd 18 host 0x7fcdc9400000 +cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns 17 +qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size +67108864 fd 17 host 0x7fcd27e00000 +cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns 16 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 +fd 16 host 0x7fcdc9200000 +cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns 15 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size +67108864 fd 15 host 0x7fcd23c00000 +cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns 14 +qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 +fd 14 host 0x7fcdc8800000 +cpr_find_fd /rom@etc/acpi/tables, id 0 returns 13 +qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size +2097152 fd 13 host 0x7fcdc8400000 +cpr_find_fd /rom@etc/table-loader, id 0 returns 11 +qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 +fd 11 host 0x7fcdc8200000 +cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns 10 +qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd +10 host 0x7fcd3be00000 +Looks like both vga.vram and qxl.vram are being preserved (with the same +addresses), and no incompatible ram blocks are found during migration. +Sorry, addressed are not the same, of course.  However corresponding ram +blocks do seem to be preserved and initialized. +So far, I have not reproduced the guest driver failure. + +However, I have isolated places where new QEMU improperly writes to +the qxl memory regions prior to starting the guest, by mmap'ing them +readonly after cpr: + +   qemu_ram_alloc_internal() +     if (reused && (strstr(name, "qxl") || strstr("name", "vga"))) +         ram_flags |= RAM_READONLY; +     new_block = qemu_ram_alloc_from_fd(...) + +I have attached a draft fix; try it and let me know. +My console window looks fine before and after cpr, using +-vnc $hostip:0 -vga qxl + +- Steve +Regarding the reproduce: when I launch the buggy version with the same +options as you, i.e. "-vnc 0.0.0.0:$port -vga qxl", and do cpr-transfer, +my VNC client silently hangs on the target after a while. Could it +happen on your stand as well? +cpr does not preserve the vnc connection and session. To test, I specify +port 0 for the source VM and port 1 for the dest. When the src vnc goes +dormant the dest vnc becomes active. +Could you try launching VM with +"-nographic -device qxl-vga"? That way VM's serial console is given you +directly in the shell, so when qxl driver crashes you're still able to +inspect the kernel messages. +I have been running like that, but have not reproduced the qxl driver crash, +and I suspect my guest image+kernel is too old. However, once I realized the +issue was post-cpr modification of qxl memory, I switched my attention to the +fix. +As for your patch, I can report that it doesn't resolve the issue as it +is. But I was able to track down another possible memory corruption +using your approach with readonly mmap'ing: +Program terminated with signal SIGSEGV, Segmentation fault. +#0 init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 +412 d->ram->magic = cpu_to_le32(QXL_RAM_MAGIC); +[Current thread is 1 (Thread 0x7f1a4f83b480 (LWP 229798))] +(gdb) bt +#0 init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 +#1 0x0000563896e7f467 in qxl_realize_common (qxl=0x5638996e0e70, +errp=0x7ffd3c2b8170) at ../hw/display/qxl.c:2142 +#2 0x0000563896e7fda1 in qxl_realize_primary (dev=0x5638996e0e70, +errp=0x7ffd3c2b81d0) at ../hw/display/qxl.c:2257 +#3 0x0000563896c7e8f2 in pci_qdev_realize (qdev=0x5638996e0e70, +errp=0x7ffd3c2b8250) at ../hw/pci/pci.c:2174 +#4 0x00005638970eb54b in device_set_realized (obj=0x5638996e0e70, value=true, +errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:494 +#5 0x00005638970f5e14 in property_set_bool (obj=0x5638996e0e70, v=0x5638996f3770, +name=0x56389759b141 "realized", opaque=0x5638987893d0, errp=0x7ffd3c2b84e0) + at ../qom/object.c:2374 +#6 0x00005638970f39f8 in object_property_set (obj=0x5638996e0e70, name=0x56389759b141 +"realized", v=0x5638996f3770, errp=0x7ffd3c2b84e0) + at ../qom/object.c:1449 +#7 0x00005638970f8586 in object_property_set_qobject (obj=0x5638996e0e70, +name=0x56389759b141 "realized", value=0x5638996df900, errp=0x7ffd3c2b84e0) + at ../qom/qom-qobject.c:28 +#8 0x00005638970f3d8d in object_property_set_bool (obj=0x5638996e0e70, +name=0x56389759b141 "realized", value=true, errp=0x7ffd3c2b84e0) + at ../qom/object.c:1519 +#9 0x00005638970eacb0 in qdev_realize (dev=0x5638996e0e70, bus=0x563898cf3c20, +errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:276 +#10 0x0000563896dba675 in qdev_device_add_from_qdict (opts=0x5638996dfe50, +from_json=false, errp=0x7ffd3c2b84e0) at ../system/qdev-monitor.c:714 +#11 0x0000563896dba721 in qdev_device_add (opts=0x563898786150, errp=0x56389855dc40 +) at ../system/qdev-monitor.c:733 +#12 0x0000563896dc48f1 in device_init_func (opaque=0x0, opts=0x563898786150, +errp=0x56389855dc40 ) at ../system/vl.c:1207 +#13 0x000056389737a6cc in qemu_opts_foreach + (list=0x563898427b60 , func=0x563896dc48ca , +opaque=0x0, errp=0x56389855dc40 ) + at ../util/qemu-option.c:1135 +#14 0x0000563896dc89b5 in qemu_create_cli_devices () at ../system/vl.c:2745 +#15 0x0000563896dc8c00 in qmp_x_exit_preconfig (errp=0x56389855dc40 +) at ../system/vl.c:2806 +#16 0x0000563896dcb5de in qemu_init (argc=33, argv=0x7ffd3c2b8948) at +../system/vl.c:3838 +#17 0x0000563897297323 in main (argc=33, argv=0x7ffd3c2b8948) at +../system/main.c:72 +So the attached adjusted version of your patch does seem to help. At +least I can't reproduce the crash on my stand. +Thanks for the stack trace; the calls to SPICE_RING_INIT in init_qxl_ram are +definitely harmful. Try V2 of the patch, attached, which skips the lines +of init_qxl_ram that modify guest memory. +I'm wondering, could it be useful to explicitly mark all the reused +memory regions readonly upon cpr-transfer, and then make them writable +back again after the migration is done? That way we will be segfaulting +early on instead of debugging tricky memory corruptions. +It's a useful debugging technique, but changing protection on a large memory +region +can be too expensive for production due to TLB shootdowns. + +Also, there are cases where writes are performed but the value is guaranteed to +be the same: + qxl_post_load() + qxl_set_mode() + d->rom->mode = cpu_to_le32(modenr); +The value is the same because mode and shadow_rom.mode were passed in vmstate +from old qemu. + +- Steve +0001-hw-qxl-cpr-support-preliminary-V2.patch +Description: +Text document + +On 3/5/25 22:19, Steven Sistare wrote: +On 3/5/2025 11:50 AM, Andrey Drobyshev wrote: +On 3/4/25 9:05 PM, Steven Sistare wrote: +On 2/28/2025 1:37 PM, Andrey Drobyshev wrote: +On 2/28/25 8:35 PM, Andrey Drobyshev wrote: +On 2/28/25 8:20 PM, Steven Sistare wrote: +On 2/28/2025 1:13 PM, Steven Sistare wrote: +On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: +Hi all, + +We've been experimenting with cpr-transfer migration mode recently +and +have discovered the following issue with the guest QXL driver: + +Run migration source: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-src.sock + +$EMULATOR -enable-kvm \ +       -machine q35 \ +       -cpu host -smp 2 -m 2G \ +       -object +memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ +ram0,share=on\ +       -machine memory-backend=ram0 \ +       -machine aux-ram-share=on \ +       -drive file=$ROOTFS,media=disk,if=virtio \ +       -qmp unix:$QMPSOCK,server=on,wait=off \ +       -nographic \ +       -device qxl-vga +Run migration target: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-dst.sock +$EMULATOR -enable-kvm \ +       -machine q35 \ +       -cpu host -smp 2 -m 2G \ +       -object +memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ +ram0,share=on\ +       -machine memory-backend=ram0 \ +       -machine aux-ram-share=on \ +       -drive file=$ROOTFS,media=disk,if=virtio \ +       -qmp unix:$QMPSOCK,server=on,wait=off \ +       -nographic \ +       -device qxl-vga \ +       -incoming tcp:0:44444 \ +       -incoming '{"channel-type": "cpr", "addr": { "transport": +"socket", "type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' +Launch the migration: +QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell +QMPSOCK=/var/run/alma8qmp-src.sock + +$QMPSHELL -p $QMPSOCK < /dev/tty3 +           done +done + +echo "bug could not be reproduced" +exit 0 +The bug itself seems to remain unfixed, as I was able to reproduce +that +with Fedora 41 guest, as well as AlmaLinux 8 guest. However our +cpr-transfer code also seems to be buggy as it triggers the +crash - +without the cpr-transfer migration the above reproduce doesn't +lead to +crash on the source VM. +I suspect that, as cpr-transfer doesn't migrate the guest +memory, but +rather passes it through the memory backend object, our code might +somehow corrupt the VRAM.  However, I wasn't able to trace the +corruption so far. +Could somebody help the investigation and take a look into +this?  Any +suggestions would be appreciated.  Thanks! +Possibly some memory region created by qxl is not being preserved. +Try adding these traces to see what is preserved: + +-trace enable='*cpr*' +-trace enable='*ram_alloc*' +Also try adding this patch to see if it flags any ram blocks as not +compatible with cpr.  A message is printed at migration start time. +https://lore.kernel.org/qemu-devel/1740667681-257312-1-git-send- +email- +steven.sistare@oracle.com/ + +- Steve +With the traces enabled + the "migration: ram block cpr blockers" +patch +applied: + +Source: +cpr_find_fd pc.bios, id 0 returns -1 +cpr_save_fd pc.bios, id 0, fd 22 +qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 22 host +0x7fec18e00000 +cpr_find_fd pc.rom, id 0 returns -1 +cpr_save_fd pc.rom, id 0, fd 23 +qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 23 host +0x7fec18c00000 +cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns -1 +cpr_save_fd 0000:00:01.0/e1000e.rom, id 0, fd 24 +qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size +262144 fd 24 host 0x7fec18a00000 +cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns -1 +cpr_save_fd 0000:00:02.0/vga.vram, id 0, fd 25 +qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size +67108864 fd 25 host 0x7feb77e00000 +cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.vrom, id 0, fd 27 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 +fd 27 host 0x7fec18800000 +cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.vram, id 0, fd 28 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size +67108864 fd 28 host 0x7feb73c00000 +cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.rom, id 0, fd 34 +qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 +fd 34 host 0x7fec18600000 +cpr_find_fd /rom@etc/acpi/tables, id 0 returns -1 +cpr_save_fd /rom@etc/acpi/tables, id 0, fd 35 +qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size +2097152 fd 35 host 0x7fec18200000 +cpr_find_fd /rom@etc/table-loader, id 0 returns -1 +cpr_save_fd /rom@etc/table-loader, id 0, fd 36 +qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 +fd 36 host 0x7feb8b600000 +cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns -1 +cpr_save_fd /rom@etc/acpi/rsdp, id 0, fd 37 +qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd +37 host 0x7feb8b400000 + +cpr_state_save cpr-transfer mode +cpr_transfer_output /var/run/alma8cpr-dst.sock +Target: +cpr_transfer_input /var/run/alma8cpr-dst.sock +cpr_state_load cpr-transfer mode +cpr_find_fd pc.bios, id 0 returns 20 +qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 20 host +0x7fcdc9800000 +cpr_find_fd pc.rom, id 0 returns 19 +qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 19 host +0x7fcdc9600000 +cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns 18 +qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size +262144 fd 18 host 0x7fcdc9400000 +cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns 17 +qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size +67108864 fd 17 host 0x7fcd27e00000 +cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns 16 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 +fd 16 host 0x7fcdc9200000 +cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns 15 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size +67108864 fd 15 host 0x7fcd23c00000 +cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns 14 +qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 +fd 14 host 0x7fcdc8800000 +cpr_find_fd /rom@etc/acpi/tables, id 0 returns 13 +qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size +2097152 fd 13 host 0x7fcdc8400000 +cpr_find_fd /rom@etc/table-loader, id 0 returns 11 +qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 +fd 11 host 0x7fcdc8200000 +cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns 10 +qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd +10 host 0x7fcd3be00000 +Looks like both vga.vram and qxl.vram are being preserved (with +the same +addresses), and no incompatible ram blocks are found during +migration. +Sorry, addressed are not the same, of course.  However +corresponding ram +blocks do seem to be preserved and initialized. +So far, I have not reproduced the guest driver failure. + +However, I have isolated places where new QEMU improperly writes to +the qxl memory regions prior to starting the guest, by mmap'ing them +readonly after cpr: + +   qemu_ram_alloc_internal() +     if (reused && (strstr(name, "qxl") || strstr("name", "vga"))) +         ram_flags |= RAM_READONLY; +     new_block = qemu_ram_alloc_from_fd(...) + +I have attached a draft fix; try it and let me know. +My console window looks fine before and after cpr, using +-vnc $hostip:0 -vga qxl + +- Steve +Regarding the reproduce: when I launch the buggy version with the same +options as you, i.e. "-vnc 0.0.0.0:$port -vga qxl", and do cpr-transfer, +my VNC client silently hangs on the target after a while.  Could it +happen on your stand as well? +cpr does not preserve the vnc connection and session.  To test, I specify +port 0 for the source VM and port 1 for the dest.  When the src vnc goes +dormant the dest vnc becomes active. +Could you try launching VM with +"-nographic -device qxl-vga"?  That way VM's serial console is given you +directly in the shell, so when qxl driver crashes you're still able to +inspect the kernel messages. +I have been running like that, but have not reproduced the qxl driver +crash, +and I suspect my guest image+kernel is too old.  However, once I +realized the +issue was post-cpr modification of qxl memory, I switched my attention +to the +fix. +As for your patch, I can report that it doesn't resolve the issue as it +is.  But I was able to track down another possible memory corruption +using your approach with readonly mmap'ing: +Program terminated with signal SIGSEGV, Segmentation fault. +#0  init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 +412         d->ram->magic       = cpu_to_le32(QXL_RAM_MAGIC); +[Current thread is 1 (Thread 0x7f1a4f83b480 (LWP 229798))] +(gdb) bt +#0  init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 +#1  0x0000563896e7f467 in qxl_realize_common (qxl=0x5638996e0e70, +errp=0x7ffd3c2b8170) at ../hw/display/qxl.c:2142 +#2  0x0000563896e7fda1 in qxl_realize_primary (dev=0x5638996e0e70, +errp=0x7ffd3c2b81d0) at ../hw/display/qxl.c:2257 +#3  0x0000563896c7e8f2 in pci_qdev_realize (qdev=0x5638996e0e70, +errp=0x7ffd3c2b8250) at ../hw/pci/pci.c:2174 +#4  0x00005638970eb54b in device_set_realized (obj=0x5638996e0e70, +value=true, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:494 +#5  0x00005638970f5e14 in property_set_bool (obj=0x5638996e0e70, +v=0x5638996f3770, name=0x56389759b141 "realized", +opaque=0x5638987893d0, errp=0x7ffd3c2b84e0) +     at ../qom/object.c:2374 +#6  0x00005638970f39f8 in object_property_set (obj=0x5638996e0e70, +name=0x56389759b141 "realized", v=0x5638996f3770, errp=0x7ffd3c2b84e0) +     at ../qom/object.c:1449 +#7  0x00005638970f8586 in object_property_set_qobject +(obj=0x5638996e0e70, name=0x56389759b141 "realized", +value=0x5638996df900, errp=0x7ffd3c2b84e0) +     at ../qom/qom-qobject.c:28 +#8  0x00005638970f3d8d in object_property_set_bool +(obj=0x5638996e0e70, name=0x56389759b141 "realized", value=true, +errp=0x7ffd3c2b84e0) +     at ../qom/object.c:1519 +#9  0x00005638970eacb0 in qdev_realize (dev=0x5638996e0e70, +bus=0x563898cf3c20, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:276 +#10 0x0000563896dba675 in qdev_device_add_from_qdict +(opts=0x5638996dfe50, from_json=false, errp=0x7ffd3c2b84e0) at +../system/qdev-monitor.c:714 +#11 0x0000563896dba721 in qdev_device_add (opts=0x563898786150, +errp=0x56389855dc40 ) at ../system/qdev-monitor.c:733 +#12 0x0000563896dc48f1 in device_init_func (opaque=0x0, +opts=0x563898786150, errp=0x56389855dc40 ) at +../system/vl.c:1207 +#13 0x000056389737a6cc in qemu_opts_foreach +     (list=0x563898427b60 , func=0x563896dc48ca +, opaque=0x0, errp=0x56389855dc40 ) +     at ../util/qemu-option.c:1135 +#14 0x0000563896dc89b5 in qemu_create_cli_devices () at +../system/vl.c:2745 +#15 0x0000563896dc8c00 in qmp_x_exit_preconfig (errp=0x56389855dc40 +) at ../system/vl.c:2806 +#16 0x0000563896dcb5de in qemu_init (argc=33, argv=0x7ffd3c2b8948) +at ../system/vl.c:3838 +#17 0x0000563897297323 in main (argc=33, argv=0x7ffd3c2b8948) at +../system/main.c:72 +So the attached adjusted version of your patch does seem to help.  At +least I can't reproduce the crash on my stand. +Thanks for the stack trace; the calls to SPICE_RING_INIT in +init_qxl_ram are +definitely harmful.  Try V2 of the patch, attached, which skips the lines +of init_qxl_ram that modify guest memory. +I'm wondering, could it be useful to explicitly mark all the reused +memory regions readonly upon cpr-transfer, and then make them writable +back again after the migration is done?  That way we will be segfaulting +early on instead of debugging tricky memory corruptions. +It's a useful debugging technique, but changing protection on a large +memory region +can be too expensive for production due to TLB shootdowns. +Good point. Though we could move this code under non-default option to +avoid re-writing. + +Den + +On 3/5/25 11:19 PM, Steven Sistare wrote: +> +On 3/5/2025 11:50 AM, Andrey Drobyshev wrote: +> +> On 3/4/25 9:05 PM, Steven Sistare wrote: +> +>> On 2/28/2025 1:37 PM, Andrey Drobyshev wrote: +> +>>> On 2/28/25 8:35 PM, Andrey Drobyshev wrote: +> +>>>> On 2/28/25 8:20 PM, Steven Sistare wrote: +> +>>>>> On 2/28/2025 1:13 PM, Steven Sistare wrote: +> +>>>>>> On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: +> +>>>>>>> Hi all, +> +>>>>>>> +> +>>>>>>> We've been experimenting with cpr-transfer migration mode recently +> +>>>>>>> and +> +>>>>>>> have discovered the following issue with the guest QXL driver: +> +>>>>>>> +> +>>>>>>> Run migration source: +> +>>>>>>>> EMULATOR=/path/to/emulator +> +>>>>>>>> ROOTFS=/path/to/image +> +>>>>>>>> QMPSOCK=/var/run/alma8qmp-src.sock +> +>>>>>>>> +> +>>>>>>>> $EMULATOR -enable-kvm \ +> +>>>>>>>>        -machine q35 \ +> +>>>>>>>>        -cpu host -smp 2 -m 2G \ +> +>>>>>>>>        -object memory-backend-file,id=ram0,size=2G,mem-path=/ +> +>>>>>>>> dev/shm/ +> +>>>>>>>> ram0,share=on\ +> +>>>>>>>>        -machine memory-backend=ram0 \ +> +>>>>>>>>        -machine aux-ram-share=on \ +> +>>>>>>>>        -drive file=$ROOTFS,media=disk,if=virtio \ +> +>>>>>>>>        -qmp unix:$QMPSOCK,server=on,wait=off \ +> +>>>>>>>>        -nographic \ +> +>>>>>>>>        -device qxl-vga +> +>>>>>>> +> +>>>>>>> Run migration target: +> +>>>>>>>> EMULATOR=/path/to/emulator +> +>>>>>>>> ROOTFS=/path/to/image +> +>>>>>>>> QMPSOCK=/var/run/alma8qmp-dst.sock +> +>>>>>>>> $EMULATOR -enable-kvm \ +> +>>>>>>>>        -machine q35 \ +> +>>>>>>>>        -cpu host -smp 2 -m 2G \ +> +>>>>>>>>        -object memory-backend-file,id=ram0,size=2G,mem-path=/ +> +>>>>>>>> dev/shm/ +> +>>>>>>>> ram0,share=on\ +> +>>>>>>>>        -machine memory-backend=ram0 \ +> +>>>>>>>>        -machine aux-ram-share=on \ +> +>>>>>>>>        -drive file=$ROOTFS,media=disk,if=virtio \ +> +>>>>>>>>        -qmp unix:$QMPSOCK,server=on,wait=off \ +> +>>>>>>>>        -nographic \ +> +>>>>>>>>        -device qxl-vga \ +> +>>>>>>>>        -incoming tcp:0:44444 \ +> +>>>>>>>>        -incoming '{"channel-type": "cpr", "addr": { "transport": +> +>>>>>>>> "socket", "type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' +> +>>>>>>> +> +>>>>>>> +> +>>>>>>> Launch the migration: +> +>>>>>>>> QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell +> +>>>>>>>> QMPSOCK=/var/run/alma8qmp-src.sock +> +>>>>>>>> +> +>>>>>>>> $QMPSHELL -p $QMPSOCK < +>>>>>>>>        migrate-set-parameters mode=cpr-transfer +> +>>>>>>>>        migrate channels=[{"channel-type":"main","addr": +> +>>>>>>>> {"transport":"socket","type":"inet","host":"0","port":"44444"}}, +> +>>>>>>>> {"channel-type":"cpr","addr": +> +>>>>>>>> {"transport":"socket","type":"unix","path":"/var/run/alma8cpr- +> +>>>>>>>> dst.sock"}}] +> +>>>>>>>> EOF +> +>>>>>>> +> +>>>>>>> Then, after a while, QXL guest driver on target crashes spewing the +> +>>>>>>> following messages: +> +>>>>>>>> [   73.962002] [TTM] Buffer eviction failed +> +>>>>>>>> [   73.962072] qxl 0000:00:02.0: object_init failed for (3149824, +> +>>>>>>>> 0x00000001) +> +>>>>>>>> [   73.962081] [drm:qxl_alloc_bo_reserved [qxl]] *ERROR* failed to +> +>>>>>>>> allocate VRAM BO +> +>>>>>>> +> +>>>>>>> That seems to be a known kernel QXL driver bug: +> +>>>>>>> +> +>>>>>>> +https://lore.kernel.org/all/20220907094423.93581-1- +> +>>>>>>> min_halo@163.com/T/ +> +>>>>>>> +https://lore.kernel.org/lkml/ZTgydqRlK6WX_b29@eldamar.lan/ +> +>>>>>>> +> +>>>>>>> (the latter discussion contains that reproduce script which +> +>>>>>>> speeds up +> +>>>>>>> the crash in the guest): +> +>>>>>>>> #!/bin/bash +> +>>>>>>>> +> +>>>>>>>> chvt 3 +> +>>>>>>>> +> +>>>>>>>> for j in $(seq 80); do +> +>>>>>>>>            echo "$(date) starting round $j" +> +>>>>>>>>            if [ "$(journalctl --boot | grep "failed to allocate +> +>>>>>>>> VRAM +> +>>>>>>>> BO")" != "" ]; then +> +>>>>>>>>                    echo "bug was reproduced after $j tries" +> +>>>>>>>>                    exit 1 +> +>>>>>>>>            fi +> +>>>>>>>>            for i in $(seq 100); do +> +>>>>>>>>                    dmesg > /dev/tty3 +> +>>>>>>>>            done +> +>>>>>>>> done +> +>>>>>>>> +> +>>>>>>>> echo "bug could not be reproduced" +> +>>>>>>>> exit 0 +> +>>>>>>> +> +>>>>>>> The bug itself seems to remain unfixed, as I was able to reproduce +> +>>>>>>> that +> +>>>>>>> with Fedora 41 guest, as well as AlmaLinux 8 guest. However our +> +>>>>>>> cpr-transfer code also seems to be buggy as it triggers the crash - +> +>>>>>>> without the cpr-transfer migration the above reproduce doesn't +> +>>>>>>> lead to +> +>>>>>>> crash on the source VM. +> +>>>>>>> +> +>>>>>>> I suspect that, as cpr-transfer doesn't migrate the guest +> +>>>>>>> memory, but +> +>>>>>>> rather passes it through the memory backend object, our code might +> +>>>>>>> somehow corrupt the VRAM.  However, I wasn't able to trace the +> +>>>>>>> corruption so far. +> +>>>>>>> +> +>>>>>>> Could somebody help the investigation and take a look into +> +>>>>>>> this?  Any +> +>>>>>>> suggestions would be appreciated.  Thanks! +> +>>>>>> +> +>>>>>> Possibly some memory region created by qxl is not being preserved. +> +>>>>>> Try adding these traces to see what is preserved: +> +>>>>>> +> +>>>>>> -trace enable='*cpr*' +> +>>>>>> -trace enable='*ram_alloc*' +> +>>>>> +> +>>>>> Also try adding this patch to see if it flags any ram blocks as not +> +>>>>> compatible with cpr.  A message is printed at migration start time. +> +>>>>>    +https://lore.kernel.org/qemu-devel/1740667681-257312-1-git-send- +> +>>>>> email- +> +>>>>> steven.sistare@oracle.com/ +> +>>>>> +> +>>>>> - Steve +> +>>>>> +> +>>>> +> +>>>> With the traces enabled + the "migration: ram block cpr blockers" +> +>>>> patch +> +>>>> applied: +> +>>>> +> +>>>> Source: +> +>>>>> cpr_find_fd pc.bios, id 0 returns -1 +> +>>>>> cpr_save_fd pc.bios, id 0, fd 22 +> +>>>>> qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 22 host +> +>>>>> 0x7fec18e00000 +> +>>>>> cpr_find_fd pc.rom, id 0 returns -1 +> +>>>>> cpr_save_fd pc.rom, id 0, fd 23 +> +>>>>> qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 23 host +> +>>>>> 0x7fec18c00000 +> +>>>>> cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns -1 +> +>>>>> cpr_save_fd 0000:00:01.0/e1000e.rom, id 0, fd 24 +> +>>>>> qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size +> +>>>>> 262144 fd 24 host 0x7fec18a00000 +> +>>>>> cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns -1 +> +>>>>> cpr_save_fd 0000:00:02.0/vga.vram, id 0, fd 25 +> +>>>>> qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size +> +>>>>> 67108864 fd 25 host 0x7feb77e00000 +> +>>>>> cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns -1 +> +>>>>> cpr_save_fd 0000:00:02.0/qxl.vrom, id 0, fd 27 +> +>>>>> qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 +> +>>>>> fd 27 host 0x7fec18800000 +> +>>>>> cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns -1 +> +>>>>> cpr_save_fd 0000:00:02.0/qxl.vram, id 0, fd 28 +> +>>>>> qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size +> +>>>>> 67108864 fd 28 host 0x7feb73c00000 +> +>>>>> cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns -1 +> +>>>>> cpr_save_fd 0000:00:02.0/qxl.rom, id 0, fd 34 +> +>>>>> qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 +> +>>>>> fd 34 host 0x7fec18600000 +> +>>>>> cpr_find_fd /rom@etc/acpi/tables, id 0 returns -1 +> +>>>>> cpr_save_fd /rom@etc/acpi/tables, id 0, fd 35 +> +>>>>> qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size +> +>>>>> 2097152 fd 35 host 0x7fec18200000 +> +>>>>> cpr_find_fd /rom@etc/table-loader, id 0 returns -1 +> +>>>>> cpr_save_fd /rom@etc/table-loader, id 0, fd 36 +> +>>>>> qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 +> +>>>>> fd 36 host 0x7feb8b600000 +> +>>>>> cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns -1 +> +>>>>> cpr_save_fd /rom@etc/acpi/rsdp, id 0, fd 37 +> +>>>>> qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd +> +>>>>> 37 host 0x7feb8b400000 +> +>>>>> +> +>>>>> cpr_state_save cpr-transfer mode +> +>>>>> cpr_transfer_output /var/run/alma8cpr-dst.sock +> +>>>> +> +>>>> Target: +> +>>>>> cpr_transfer_input /var/run/alma8cpr-dst.sock +> +>>>>> cpr_state_load cpr-transfer mode +> +>>>>> cpr_find_fd pc.bios, id 0 returns 20 +> +>>>>> qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 20 host +> +>>>>> 0x7fcdc9800000 +> +>>>>> cpr_find_fd pc.rom, id 0 returns 19 +> +>>>>> qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 19 host +> +>>>>> 0x7fcdc9600000 +> +>>>>> cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns 18 +> +>>>>> qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size +> +>>>>> 262144 fd 18 host 0x7fcdc9400000 +> +>>>>> cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns 17 +> +>>>>> qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size +> +>>>>> 67108864 fd 17 host 0x7fcd27e00000 +> +>>>>> cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns 16 +> +>>>>> qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 +> +>>>>> fd 16 host 0x7fcdc9200000 +> +>>>>> cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns 15 +> +>>>>> qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size +> +>>>>> 67108864 fd 15 host 0x7fcd23c00000 +> +>>>>> cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns 14 +> +>>>>> qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 +> +>>>>> fd 14 host 0x7fcdc8800000 +> +>>>>> cpr_find_fd /rom@etc/acpi/tables, id 0 returns 13 +> +>>>>> qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size +> +>>>>> 2097152 fd 13 host 0x7fcdc8400000 +> +>>>>> cpr_find_fd /rom@etc/table-loader, id 0 returns 11 +> +>>>>> qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 +> +>>>>> fd 11 host 0x7fcdc8200000 +> +>>>>> cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns 10 +> +>>>>> qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd +> +>>>>> 10 host 0x7fcd3be00000 +> +>>>> +> +>>>> Looks like both vga.vram and qxl.vram are being preserved (with the +> +>>>> same +> +>>>> addresses), and no incompatible ram blocks are found during migration. +> +>>> +> +>>> Sorry, addressed are not the same, of course.  However corresponding +> +>>> ram +> +>>> blocks do seem to be preserved and initialized. +> +>> +> +>> So far, I have not reproduced the guest driver failure. +> +>> +> +>> However, I have isolated places where new QEMU improperly writes to +> +>> the qxl memory regions prior to starting the guest, by mmap'ing them +> +>> readonly after cpr: +> +>> +> +>>    qemu_ram_alloc_internal() +> +>>      if (reused && (strstr(name, "qxl") || strstr("name", "vga"))) +> +>>          ram_flags |= RAM_READONLY; +> +>>      new_block = qemu_ram_alloc_from_fd(...) +> +>> +> +>> I have attached a draft fix; try it and let me know. +> +>> My console window looks fine before and after cpr, using +> +>> -vnc $hostip:0 -vga qxl +> +>> +> +>> - Steve +> +> +> +> Regarding the reproduce: when I launch the buggy version with the same +> +> options as you, i.e. "-vnc 0.0.0.0:$port -vga qxl", and do cpr-transfer, +> +> my VNC client silently hangs on the target after a while.  Could it +> +> happen on your stand as well? +> +> +cpr does not preserve the vnc connection and session.  To test, I specify +> +port 0 for the source VM and port 1 for the dest.  When the src vnc goes +> +dormant the dest vnc becomes active. +> +Sure, I meant that VNC on the dest (on the port 1) works for a while +after the migration and then hangs, apparently after the guest QXL crash. + +> +> Could you try launching VM with +> +> "-nographic -device qxl-vga"?  That way VM's serial console is given you +> +> directly in the shell, so when qxl driver crashes you're still able to +> +> inspect the kernel messages. +> +> +I have been running like that, but have not reproduced the qxl driver +> +crash, +> +and I suspect my guest image+kernel is too old. +Yes, that's probably the case. But the crash occurs on my Fedora 41 +guest with the 6.11.5-300.fc41.x86_64 kernel, so newer kernels seem to +be buggy. + + +> +However, once I realized the +> +issue was post-cpr modification of qxl memory, I switched my attention +> +to the +> +fix. +> +> +> As for your patch, I can report that it doesn't resolve the issue as it +> +> is.  But I was able to track down another possible memory corruption +> +> using your approach with readonly mmap'ing: +> +> +> +>> Program terminated with signal SIGSEGV, Segmentation fault. +> +>> #0  init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 +> +>> 412         d->ram->magic       = cpu_to_le32(QXL_RAM_MAGIC); +> +>> [Current thread is 1 (Thread 0x7f1a4f83b480 (LWP 229798))] +> +>> (gdb) bt +> +>> #0  init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 +> +>> #1  0x0000563896e7f467 in qxl_realize_common (qxl=0x5638996e0e70, +> +>> errp=0x7ffd3c2b8170) at ../hw/display/qxl.c:2142 +> +>> #2  0x0000563896e7fda1 in qxl_realize_primary (dev=0x5638996e0e70, +> +>> errp=0x7ffd3c2b81d0) at ../hw/display/qxl.c:2257 +> +>> #3  0x0000563896c7e8f2 in pci_qdev_realize (qdev=0x5638996e0e70, +> +>> errp=0x7ffd3c2b8250) at ../hw/pci/pci.c:2174 +> +>> #4  0x00005638970eb54b in device_set_realized (obj=0x5638996e0e70, +> +>> value=true, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:494 +> +>> #5  0x00005638970f5e14 in property_set_bool (obj=0x5638996e0e70, +> +>> v=0x5638996f3770, name=0x56389759b141 "realized", +> +>> opaque=0x5638987893d0, errp=0x7ffd3c2b84e0) +> +>>      at ../qom/object.c:2374 +> +>> #6  0x00005638970f39f8 in object_property_set (obj=0x5638996e0e70, +> +>> name=0x56389759b141 "realized", v=0x5638996f3770, errp=0x7ffd3c2b84e0) +> +>>      at ../qom/object.c:1449 +> +>> #7  0x00005638970f8586 in object_property_set_qobject +> +>> (obj=0x5638996e0e70, name=0x56389759b141 "realized", +> +>> value=0x5638996df900, errp=0x7ffd3c2b84e0) +> +>>      at ../qom/qom-qobject.c:28 +> +>> #8  0x00005638970f3d8d in object_property_set_bool +> +>> (obj=0x5638996e0e70, name=0x56389759b141 "realized", value=true, +> +>> errp=0x7ffd3c2b84e0) +> +>>      at ../qom/object.c:1519 +> +>> #9  0x00005638970eacb0 in qdev_realize (dev=0x5638996e0e70, +> +>> bus=0x563898cf3c20, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:276 +> +>> #10 0x0000563896dba675 in qdev_device_add_from_qdict +> +>> (opts=0x5638996dfe50, from_json=false, errp=0x7ffd3c2b84e0) at ../ +> +>> system/qdev-monitor.c:714 +> +>> #11 0x0000563896dba721 in qdev_device_add (opts=0x563898786150, +> +>> errp=0x56389855dc40 ) at ../system/qdev-monitor.c:733 +> +>> #12 0x0000563896dc48f1 in device_init_func (opaque=0x0, +> +>> opts=0x563898786150, errp=0x56389855dc40 ) at ../system/ +> +>> vl.c:1207 +> +>> #13 0x000056389737a6cc in qemu_opts_foreach +> +>>      (list=0x563898427b60 , func=0x563896dc48ca +> +>> , opaque=0x0, errp=0x56389855dc40 ) +> +>>      at ../util/qemu-option.c:1135 +> +>> #14 0x0000563896dc89b5 in qemu_create_cli_devices () at ../system/ +> +>> vl.c:2745 +> +>> #15 0x0000563896dc8c00 in qmp_x_exit_preconfig (errp=0x56389855dc40 +> +>> ) at ../system/vl.c:2806 +> +>> #16 0x0000563896dcb5de in qemu_init (argc=33, argv=0x7ffd3c2b8948) +> +>> at ../system/vl.c:3838 +> +>> #17 0x0000563897297323 in main (argc=33, argv=0x7ffd3c2b8948) at ../ +> +>> system/main.c:72 +> +> +> +> So the attached adjusted version of your patch does seem to help.  At +> +> least I can't reproduce the crash on my stand. +> +> +Thanks for the stack trace; the calls to SPICE_RING_INIT in init_qxl_ram +> +are +> +definitely harmful.  Try V2 of the patch, attached, which skips the lines +> +of init_qxl_ram that modify guest memory. +> +Thanks, your v2 patch does seem to prevent the crash. Would you re-send +it to the list as a proper fix? + +> +> I'm wondering, could it be useful to explicitly mark all the reused +> +> memory regions readonly upon cpr-transfer, and then make them writable +> +> back again after the migration is done?  That way we will be segfaulting +> +> early on instead of debugging tricky memory corruptions. +> +> +It's a useful debugging technique, but changing protection on a large +> +memory region +> +can be too expensive for production due to TLB shootdowns. +> +> +Also, there are cases where writes are performed but the value is +> +guaranteed to +> +be the same: +> +  qxl_post_load() +> +    qxl_set_mode() +> +      d->rom->mode = cpu_to_le32(modenr); +> +The value is the same because mode and shadow_rom.mode were passed in +> +vmstate +> +from old qemu. +> +There're also cases where devices' ROM might be re-initialized. E.g. +this segfault occures upon further exploration of RO mapped RAM blocks: + +> +Program terminated with signal SIGSEGV, Segmentation fault. +> +#0 __memmove_avx_unaligned_erms () at +> +../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:664 +> +664 rep movsb +> +[Current thread is 1 (Thread 0x7f6e7d08b480 (LWP 310379))] +> +(gdb) bt +> +#0 __memmove_avx_unaligned_erms () at +> +../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:664 +> +#1 0x000055aa1d030ecd in rom_set_mr (rom=0x55aa200ba380, +> +owner=0x55aa2019ac10, name=0x7fffb8272bc0 "/rom@etc/acpi/tables", ro=true) +> +at ../hw/core/loader.c:1032 +> +#2 0x000055aa1d031577 in rom_add_blob +> +(name=0x55aa1da51f13 "etc/acpi/tables", blob=0x55aa208a1070, len=131072, +> +max_len=2097152, addr=18446744073709551615, fw_file_name=0x55aa1da51f13 +> +"etc/acpi/tables", fw_callback=0x55aa1d441f59 , +> +callback_opaque=0x55aa20ff0010, as=0x0, read_only=true) at +> +../hw/core/loader.c:1147 +> +#3 0x000055aa1cfd788d in acpi_add_rom_blob +> +(update=0x55aa1d441f59 , opaque=0x55aa20ff0010, +> +blob=0x55aa1fc9aa00, name=0x55aa1da51f13 "etc/acpi/tables") at +> +../hw/acpi/utils.c:46 +> +#4 0x000055aa1d44213f in acpi_setup () at ../hw/i386/acpi-build.c:2720 +> +#5 0x000055aa1d434199 in pc_machine_done (notifier=0x55aa1ff15050, data=0x0) +> +at ../hw/i386/pc.c:638 +> +#6 0x000055aa1d876845 in notifier_list_notify (list=0x55aa1ea25c10 +> +, data=0x0) at ../util/notify.c:39 +> +#7 0x000055aa1d039ee5 in qdev_machine_creation_done () at +> +../hw/core/machine.c:1749 +> +#8 0x000055aa1d2c7b3e in qemu_machine_creation_done (errp=0x55aa1ea5cc40 +> +) at ../system/vl.c:2779 +> +#9 0x000055aa1d2c7c7d in qmp_x_exit_preconfig (errp=0x55aa1ea5cc40 +> +) at ../system/vl.c:2807 +> +#10 0x000055aa1d2ca64f in qemu_init (argc=35, argv=0x7fffb82730e8) at +> +../system/vl.c:3838 +> +#11 0x000055aa1d79638c in main (argc=35, argv=0x7fffb82730e8) at +> +../system/main.c:72 +I'm not sure whether ACPI tables ROM in particular is rewritten with the +same content, but there might be cases where ROM can be read from file +system upon initialization. That is undesirable as guest kernel +certainly won't be too happy about sudden change of the device's ROM +content. + +So the issue we're dealing with here is any unwanted memory related +device initialization upon cpr. + +For now the only thing that comes to my mind is to make a test where we +put as many devices as we can into a VM, make ram blocks RO upon cpr +(and remap them as RW later after migration is done, if needed), and +catch any unwanted memory violations. As Den suggested, we might +consider adding that behaviour as a separate non-default option (or +"migrate" command flag specific to cpr-transfer), which would only be +used in the testing. + +Andrey + +On 3/6/25 16:16, Andrey Drobyshev wrote: +On 3/5/25 11:19 PM, Steven Sistare wrote: +On 3/5/2025 11:50 AM, Andrey Drobyshev wrote: +On 3/4/25 9:05 PM, Steven Sistare wrote: +On 2/28/2025 1:37 PM, Andrey Drobyshev wrote: +On 2/28/25 8:35 PM, Andrey Drobyshev wrote: +On 2/28/25 8:20 PM, Steven Sistare wrote: +On 2/28/2025 1:13 PM, Steven Sistare wrote: +On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: +Hi all, + +We've been experimenting with cpr-transfer migration mode recently +and +have discovered the following issue with the guest QXL driver: + +Run migration source: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-src.sock + +$EMULATOR -enable-kvm \ +        -machine q35 \ +        -cpu host -smp 2 -m 2G \ +        -object memory-backend-file,id=ram0,size=2G,mem-path=/ +dev/shm/ +ram0,share=on\ +        -machine memory-backend=ram0 \ +        -machine aux-ram-share=on \ +        -drive file=$ROOTFS,media=disk,if=virtio \ +        -qmp unix:$QMPSOCK,server=on,wait=off \ +        -nographic \ +        -device qxl-vga +Run migration target: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-dst.sock +$EMULATOR -enable-kvm \ +        -machine q35 \ +        -cpu host -smp 2 -m 2G \ +        -object memory-backend-file,id=ram0,size=2G,mem-path=/ +dev/shm/ +ram0,share=on\ +        -machine memory-backend=ram0 \ +        -machine aux-ram-share=on \ +        -drive file=$ROOTFS,media=disk,if=virtio \ +        -qmp unix:$QMPSOCK,server=on,wait=off \ +        -nographic \ +        -device qxl-vga \ +        -incoming tcp:0:44444 \ +        -incoming '{"channel-type": "cpr", "addr": { "transport": +"socket", "type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' +Launch the migration: +QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell +QMPSOCK=/var/run/alma8qmp-src.sock + +$QMPSHELL -p $QMPSOCK < /dev/tty3 +            done +done + +echo "bug could not be reproduced" +exit 0 +The bug itself seems to remain unfixed, as I was able to reproduce +that +with Fedora 41 guest, as well as AlmaLinux 8 guest. However our +cpr-transfer code also seems to be buggy as it triggers the crash - +without the cpr-transfer migration the above reproduce doesn't +lead to +crash on the source VM. + +I suspect that, as cpr-transfer doesn't migrate the guest +memory, but +rather passes it through the memory backend object, our code might +somehow corrupt the VRAM.  However, I wasn't able to trace the +corruption so far. + +Could somebody help the investigation and take a look into +this?  Any +suggestions would be appreciated.  Thanks! +Possibly some memory region created by qxl is not being preserved. +Try adding these traces to see what is preserved: + +-trace enable='*cpr*' +-trace enable='*ram_alloc*' +Also try adding this patch to see if it flags any ram blocks as not +compatible with cpr.  A message is printed at migration start time. +    +https://lore.kernel.org/qemu-devel/1740667681-257312-1-git-send- +email- +steven.sistare@oracle.com/ + +- Steve +With the traces enabled + the "migration: ram block cpr blockers" +patch +applied: + +Source: +cpr_find_fd pc.bios, id 0 returns -1 +cpr_save_fd pc.bios, id 0, fd 22 +qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 22 host +0x7fec18e00000 +cpr_find_fd pc.rom, id 0 returns -1 +cpr_save_fd pc.rom, id 0, fd 23 +qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 23 host +0x7fec18c00000 +cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns -1 +cpr_save_fd 0000:00:01.0/e1000e.rom, id 0, fd 24 +qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size +262144 fd 24 host 0x7fec18a00000 +cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns -1 +cpr_save_fd 0000:00:02.0/vga.vram, id 0, fd 25 +qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size +67108864 fd 25 host 0x7feb77e00000 +cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.vrom, id 0, fd 27 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 +fd 27 host 0x7fec18800000 +cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.vram, id 0, fd 28 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size +67108864 fd 28 host 0x7feb73c00000 +cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.rom, id 0, fd 34 +qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 +fd 34 host 0x7fec18600000 +cpr_find_fd /rom@etc/acpi/tables, id 0 returns -1 +cpr_save_fd /rom@etc/acpi/tables, id 0, fd 35 +qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size +2097152 fd 35 host 0x7fec18200000 +cpr_find_fd /rom@etc/table-loader, id 0 returns -1 +cpr_save_fd /rom@etc/table-loader, id 0, fd 36 +qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 +fd 36 host 0x7feb8b600000 +cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns -1 +cpr_save_fd /rom@etc/acpi/rsdp, id 0, fd 37 +qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd +37 host 0x7feb8b400000 + +cpr_state_save cpr-transfer mode +cpr_transfer_output /var/run/alma8cpr-dst.sock +Target: +cpr_transfer_input /var/run/alma8cpr-dst.sock +cpr_state_load cpr-transfer mode +cpr_find_fd pc.bios, id 0 returns 20 +qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 20 host +0x7fcdc9800000 +cpr_find_fd pc.rom, id 0 returns 19 +qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 19 host +0x7fcdc9600000 +cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns 18 +qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size +262144 fd 18 host 0x7fcdc9400000 +cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns 17 +qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size +67108864 fd 17 host 0x7fcd27e00000 +cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns 16 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 +fd 16 host 0x7fcdc9200000 +cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns 15 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size +67108864 fd 15 host 0x7fcd23c00000 +cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns 14 +qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 +fd 14 host 0x7fcdc8800000 +cpr_find_fd /rom@etc/acpi/tables, id 0 returns 13 +qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size +2097152 fd 13 host 0x7fcdc8400000 +cpr_find_fd /rom@etc/table-loader, id 0 returns 11 +qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 +fd 11 host 0x7fcdc8200000 +cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns 10 +qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd +10 host 0x7fcd3be00000 +Looks like both vga.vram and qxl.vram are being preserved (with the +same +addresses), and no incompatible ram blocks are found during migration. +Sorry, addressed are not the same, of course.  However corresponding +ram +blocks do seem to be preserved and initialized. +So far, I have not reproduced the guest driver failure. + +However, I have isolated places where new QEMU improperly writes to +the qxl memory regions prior to starting the guest, by mmap'ing them +readonly after cpr: + +    qemu_ram_alloc_internal() +      if (reused && (strstr(name, "qxl") || strstr("name", "vga"))) +          ram_flags |= RAM_READONLY; +      new_block = qemu_ram_alloc_from_fd(...) + +I have attached a draft fix; try it and let me know. +My console window looks fine before and after cpr, using +-vnc $hostip:0 -vga qxl + +- Steve +Regarding the reproduce: when I launch the buggy version with the same +options as you, i.e. "-vnc 0.0.0.0:$port -vga qxl", and do cpr-transfer, +my VNC client silently hangs on the target after a while.  Could it +happen on your stand as well? +cpr does not preserve the vnc connection and session.  To test, I specify +port 0 for the source VM and port 1 for the dest.  When the src vnc goes +dormant the dest vnc becomes active. +Sure, I meant that VNC on the dest (on the port 1) works for a while +after the migration and then hangs, apparently after the guest QXL crash. +Could you try launching VM with +"-nographic -device qxl-vga"?  That way VM's serial console is given you +directly in the shell, so when qxl driver crashes you're still able to +inspect the kernel messages. +I have been running like that, but have not reproduced the qxl driver +crash, +and I suspect my guest image+kernel is too old. +Yes, that's probably the case. But the crash occurs on my Fedora 41 +guest with the 6.11.5-300.fc41.x86_64 kernel, so newer kernels seem to +be buggy. +However, once I realized the +issue was post-cpr modification of qxl memory, I switched my attention +to the +fix. +As for your patch, I can report that it doesn't resolve the issue as it +is.  But I was able to track down another possible memory corruption +using your approach with readonly mmap'ing: +Program terminated with signal SIGSEGV, Segmentation fault. +#0  init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 +412         d->ram->magic       = cpu_to_le32(QXL_RAM_MAGIC); +[Current thread is 1 (Thread 0x7f1a4f83b480 (LWP 229798))] +(gdb) bt +#0  init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 +#1  0x0000563896e7f467 in qxl_realize_common (qxl=0x5638996e0e70, +errp=0x7ffd3c2b8170) at ../hw/display/qxl.c:2142 +#2  0x0000563896e7fda1 in qxl_realize_primary (dev=0x5638996e0e70, +errp=0x7ffd3c2b81d0) at ../hw/display/qxl.c:2257 +#3  0x0000563896c7e8f2 in pci_qdev_realize (qdev=0x5638996e0e70, +errp=0x7ffd3c2b8250) at ../hw/pci/pci.c:2174 +#4  0x00005638970eb54b in device_set_realized (obj=0x5638996e0e70, +value=true, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:494 +#5  0x00005638970f5e14 in property_set_bool (obj=0x5638996e0e70, +v=0x5638996f3770, name=0x56389759b141 "realized", +opaque=0x5638987893d0, errp=0x7ffd3c2b84e0) +      at ../qom/object.c:2374 +#6  0x00005638970f39f8 in object_property_set (obj=0x5638996e0e70, +name=0x56389759b141 "realized", v=0x5638996f3770, errp=0x7ffd3c2b84e0) +      at ../qom/object.c:1449 +#7  0x00005638970f8586 in object_property_set_qobject +(obj=0x5638996e0e70, name=0x56389759b141 "realized", +value=0x5638996df900, errp=0x7ffd3c2b84e0) +      at ../qom/qom-qobject.c:28 +#8  0x00005638970f3d8d in object_property_set_bool +(obj=0x5638996e0e70, name=0x56389759b141 "realized", value=true, +errp=0x7ffd3c2b84e0) +      at ../qom/object.c:1519 +#9  0x00005638970eacb0 in qdev_realize (dev=0x5638996e0e70, +bus=0x563898cf3c20, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:276 +#10 0x0000563896dba675 in qdev_device_add_from_qdict +(opts=0x5638996dfe50, from_json=false, errp=0x7ffd3c2b84e0) at ../ +system/qdev-monitor.c:714 +#11 0x0000563896dba721 in qdev_device_add (opts=0x563898786150, +errp=0x56389855dc40 ) at ../system/qdev-monitor.c:733 +#12 0x0000563896dc48f1 in device_init_func (opaque=0x0, +opts=0x563898786150, errp=0x56389855dc40 ) at ../system/ +vl.c:1207 +#13 0x000056389737a6cc in qemu_opts_foreach +      (list=0x563898427b60 , func=0x563896dc48ca +, opaque=0x0, errp=0x56389855dc40 ) +      at ../util/qemu-option.c:1135 +#14 0x0000563896dc89b5 in qemu_create_cli_devices () at ../system/ +vl.c:2745 +#15 0x0000563896dc8c00 in qmp_x_exit_preconfig (errp=0x56389855dc40 +) at ../system/vl.c:2806 +#16 0x0000563896dcb5de in qemu_init (argc=33, argv=0x7ffd3c2b8948) +at ../system/vl.c:3838 +#17 0x0000563897297323 in main (argc=33, argv=0x7ffd3c2b8948) at ../ +system/main.c:72 +So the attached adjusted version of your patch does seem to help.  At +least I can't reproduce the crash on my stand. +Thanks for the stack trace; the calls to SPICE_RING_INIT in init_qxl_ram +are +definitely harmful.  Try V2 of the patch, attached, which skips the lines +of init_qxl_ram that modify guest memory. +Thanks, your v2 patch does seem to prevent the crash. Would you re-send +it to the list as a proper fix? +I'm wondering, could it be useful to explicitly mark all the reused +memory regions readonly upon cpr-transfer, and then make them writable +back again after the migration is done?  That way we will be segfaulting +early on instead of debugging tricky memory corruptions. +It's a useful debugging technique, but changing protection on a large +memory region +can be too expensive for production due to TLB shootdowns. + +Also, there are cases where writes are performed but the value is +guaranteed to +be the same: +   qxl_post_load() +     qxl_set_mode() +       d->rom->mode = cpu_to_le32(modenr); +The value is the same because mode and shadow_rom.mode were passed in +vmstate +from old qemu. +There're also cases where devices' ROM might be re-initialized. E.g. +this segfault occures upon further exploration of RO mapped RAM blocks: +Program terminated with signal SIGSEGV, Segmentation fault. +#0 __memmove_avx_unaligned_erms () at +../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:664 +664 rep movsb +[Current thread is 1 (Thread 0x7f6e7d08b480 (LWP 310379))] +(gdb) bt +#0 __memmove_avx_unaligned_erms () at +../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:664 +#1 0x000055aa1d030ecd in rom_set_mr (rom=0x55aa200ba380, owner=0x55aa2019ac10, +name=0x7fffb8272bc0 "/rom@etc/acpi/tables", ro=true) + at ../hw/core/loader.c:1032 +#2 0x000055aa1d031577 in rom_add_blob + (name=0x55aa1da51f13 "etc/acpi/tables", blob=0x55aa208a1070, len=131072, max_len=2097152, +addr=18446744073709551615, fw_file_name=0x55aa1da51f13 "etc/acpi/tables", +fw_callback=0x55aa1d441f59 , callback_opaque=0x55aa20ff0010, as=0x0, +read_only=true) at ../hw/core/loader.c:1147 +#3 0x000055aa1cfd788d in acpi_add_rom_blob + (update=0x55aa1d441f59 , opaque=0x55aa20ff0010, +blob=0x55aa1fc9aa00, name=0x55aa1da51f13 "etc/acpi/tables") at ../hw/acpi/utils.c:46 +#4 0x000055aa1d44213f in acpi_setup () at ../hw/i386/acpi-build.c:2720 +#5 0x000055aa1d434199 in pc_machine_done (notifier=0x55aa1ff15050, data=0x0) +at ../hw/i386/pc.c:638 +#6 0x000055aa1d876845 in notifier_list_notify (list=0x55aa1ea25c10 +, data=0x0) at ../util/notify.c:39 +#7 0x000055aa1d039ee5 in qdev_machine_creation_done () at +../hw/core/machine.c:1749 +#8 0x000055aa1d2c7b3e in qemu_machine_creation_done (errp=0x55aa1ea5cc40 +) at ../system/vl.c:2779 +#9 0x000055aa1d2c7c7d in qmp_x_exit_preconfig (errp=0x55aa1ea5cc40 +) at ../system/vl.c:2807 +#10 0x000055aa1d2ca64f in qemu_init (argc=35, argv=0x7fffb82730e8) at +../system/vl.c:3838 +#11 0x000055aa1d79638c in main (argc=35, argv=0x7fffb82730e8) at +../system/main.c:72 +I'm not sure whether ACPI tables ROM in particular is rewritten with the +same content, but there might be cases where ROM can be read from file +system upon initialization. That is undesirable as guest kernel +certainly won't be too happy about sudden change of the device's ROM +content. + +So the issue we're dealing with here is any unwanted memory related +device initialization upon cpr. + +For now the only thing that comes to my mind is to make a test where we +put as many devices as we can into a VM, make ram blocks RO upon cpr +(and remap them as RW later after migration is done, if needed), and +catch any unwanted memory violations. As Den suggested, we might +consider adding that behaviour as a separate non-default option (or +"migrate" command flag specific to cpr-transfer), which would only be +used in the testing. + +Andrey +No way. ACPI with the source must be used in the same way as BIOSes +and optional ROMs. + +Den + +On 3/6/2025 10:52 AM, Denis V. Lunev wrote: +On 3/6/25 16:16, Andrey Drobyshev wrote: +On 3/5/25 11:19 PM, Steven Sistare wrote: +On 3/5/2025 11:50 AM, Andrey Drobyshev wrote: +On 3/4/25 9:05 PM, Steven Sistare wrote: +On 2/28/2025 1:37 PM, Andrey Drobyshev wrote: +On 2/28/25 8:35 PM, Andrey Drobyshev wrote: +On 2/28/25 8:20 PM, Steven Sistare wrote: +On 2/28/2025 1:13 PM, Steven Sistare wrote: +On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: +Hi all, + +We've been experimenting with cpr-transfer migration mode recently +and +have discovered the following issue with the guest QXL driver: + +Run migration source: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-src.sock + +$EMULATOR -enable-kvm \ +        -machine q35 \ +        -cpu host -smp 2 -m 2G \ +        -object memory-backend-file,id=ram0,size=2G,mem-path=/ +dev/shm/ +ram0,share=on\ +        -machine memory-backend=ram0 \ +        -machine aux-ram-share=on \ +        -drive file=$ROOTFS,media=disk,if=virtio \ +        -qmp unix:$QMPSOCK,server=on,wait=off \ +        -nographic \ +        -device qxl-vga +Run migration target: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-dst.sock +$EMULATOR -enable-kvm \ +        -machine q35 \ +        -cpu host -smp 2 -m 2G \ +        -object memory-backend-file,id=ram0,size=2G,mem-path=/ +dev/shm/ +ram0,share=on\ +        -machine memory-backend=ram0 \ +        -machine aux-ram-share=on \ +        -drive file=$ROOTFS,media=disk,if=virtio \ +        -qmp unix:$QMPSOCK,server=on,wait=off \ +        -nographic \ +        -device qxl-vga \ +        -incoming tcp:0:44444 \ +        -incoming '{"channel-type": "cpr", "addr": { "transport": +"socket", "type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' +Launch the migration: +QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell +QMPSOCK=/var/run/alma8qmp-src.sock + +$QMPSHELL -p $QMPSOCK < /dev/tty3 +            done +done + +echo "bug could not be reproduced" +exit 0 +The bug itself seems to remain unfixed, as I was able to reproduce +that +with Fedora 41 guest, as well as AlmaLinux 8 guest. However our +cpr-transfer code also seems to be buggy as it triggers the crash - +without the cpr-transfer migration the above reproduce doesn't +lead to +crash on the source VM. + +I suspect that, as cpr-transfer doesn't migrate the guest +memory, but +rather passes it through the memory backend object, our code might +somehow corrupt the VRAM.  However, I wasn't able to trace the +corruption so far. + +Could somebody help the investigation and take a look into +this?  Any +suggestions would be appreciated.  Thanks! +Possibly some memory region created by qxl is not being preserved. +Try adding these traces to see what is preserved: + +-trace enable='*cpr*' +-trace enable='*ram_alloc*' +Also try adding this patch to see if it flags any ram blocks as not +compatible with cpr.  A message is printed at migration start time. +    +https://lore.kernel.org/qemu-devel/1740667681-257312-1-git-send- +email- +steven.sistare@oracle.com/ + +- Steve +With the traces enabled + the "migration: ram block cpr blockers" +patch +applied: + +Source: +cpr_find_fd pc.bios, id 0 returns -1 +cpr_save_fd pc.bios, id 0, fd 22 +qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 22 host +0x7fec18e00000 +cpr_find_fd pc.rom, id 0 returns -1 +cpr_save_fd pc.rom, id 0, fd 23 +qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 23 host +0x7fec18c00000 +cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns -1 +cpr_save_fd 0000:00:01.0/e1000e.rom, id 0, fd 24 +qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size +262144 fd 24 host 0x7fec18a00000 +cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns -1 +cpr_save_fd 0000:00:02.0/vga.vram, id 0, fd 25 +qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size +67108864 fd 25 host 0x7feb77e00000 +cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.vrom, id 0, fd 27 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 +fd 27 host 0x7fec18800000 +cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.vram, id 0, fd 28 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size +67108864 fd 28 host 0x7feb73c00000 +cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.rom, id 0, fd 34 +qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 +fd 34 host 0x7fec18600000 +cpr_find_fd /rom@etc/acpi/tables, id 0 returns -1 +cpr_save_fd /rom@etc/acpi/tables, id 0, fd 35 +qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size +2097152 fd 35 host 0x7fec18200000 +cpr_find_fd /rom@etc/table-loader, id 0 returns -1 +cpr_save_fd /rom@etc/table-loader, id 0, fd 36 +qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 +fd 36 host 0x7feb8b600000 +cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns -1 +cpr_save_fd /rom@etc/acpi/rsdp, id 0, fd 37 +qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd +37 host 0x7feb8b400000 + +cpr_state_save cpr-transfer mode +cpr_transfer_output /var/run/alma8cpr-dst.sock +Target: +cpr_transfer_input /var/run/alma8cpr-dst.sock +cpr_state_load cpr-transfer mode +cpr_find_fd pc.bios, id 0 returns 20 +qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 20 host +0x7fcdc9800000 +cpr_find_fd pc.rom, id 0 returns 19 +qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 19 host +0x7fcdc9600000 +cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns 18 +qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size +262144 fd 18 host 0x7fcdc9400000 +cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns 17 +qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size +67108864 fd 17 host 0x7fcd27e00000 +cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns 16 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 +fd 16 host 0x7fcdc9200000 +cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns 15 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size +67108864 fd 15 host 0x7fcd23c00000 +cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns 14 +qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 +fd 14 host 0x7fcdc8800000 +cpr_find_fd /rom@etc/acpi/tables, id 0 returns 13 +qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size +2097152 fd 13 host 0x7fcdc8400000 +cpr_find_fd /rom@etc/table-loader, id 0 returns 11 +qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 +fd 11 host 0x7fcdc8200000 +cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns 10 +qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd +10 host 0x7fcd3be00000 +Looks like both vga.vram and qxl.vram are being preserved (with the +same +addresses), and no incompatible ram blocks are found during migration. +Sorry, addressed are not the same, of course.  However corresponding +ram +blocks do seem to be preserved and initialized. +So far, I have not reproduced the guest driver failure. + +However, I have isolated places where new QEMU improperly writes to +the qxl memory regions prior to starting the guest, by mmap'ing them +readonly after cpr: + +    qemu_ram_alloc_internal() +      if (reused && (strstr(name, "qxl") || strstr("name", "vga"))) +          ram_flags |= RAM_READONLY; +      new_block = qemu_ram_alloc_from_fd(...) + +I have attached a draft fix; try it and let me know. +My console window looks fine before and after cpr, using +-vnc $hostip:0 -vga qxl + +- Steve +Regarding the reproduce: when I launch the buggy version with the same +options as you, i.e. "-vnc 0.0.0.0:$port -vga qxl", and do cpr-transfer, +my VNC client silently hangs on the target after a while.  Could it +happen on your stand as well? +cpr does not preserve the vnc connection and session.  To test, I specify +port 0 for the source VM and port 1 for the dest.  When the src vnc goes +dormant the dest vnc becomes active. +Sure, I meant that VNC on the dest (on the port 1) works for a while +after the migration and then hangs, apparently after the guest QXL crash. +Could you try launching VM with +"-nographic -device qxl-vga"?  That way VM's serial console is given you +directly in the shell, so when qxl driver crashes you're still able to +inspect the kernel messages. +I have been running like that, but have not reproduced the qxl driver +crash, +and I suspect my guest image+kernel is too old. +Yes, that's probably the case.  But the crash occurs on my Fedora 41 +guest with the 6.11.5-300.fc41.x86_64 kernel, so newer kernels seem to +be buggy. +However, once I realized the +issue was post-cpr modification of qxl memory, I switched my attention +to the +fix. +As for your patch, I can report that it doesn't resolve the issue as it +is.  But I was able to track down another possible memory corruption +using your approach with readonly mmap'ing: +Program terminated with signal SIGSEGV, Segmentation fault. +#0  init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 +412         d->ram->magic       = cpu_to_le32(QXL_RAM_MAGIC); +[Current thread is 1 (Thread 0x7f1a4f83b480 (LWP 229798))] +(gdb) bt +#0  init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 +#1  0x0000563896e7f467 in qxl_realize_common (qxl=0x5638996e0e70, +errp=0x7ffd3c2b8170) at ../hw/display/qxl.c:2142 +#2  0x0000563896e7fda1 in qxl_realize_primary (dev=0x5638996e0e70, +errp=0x7ffd3c2b81d0) at ../hw/display/qxl.c:2257 +#3  0x0000563896c7e8f2 in pci_qdev_realize (qdev=0x5638996e0e70, +errp=0x7ffd3c2b8250) at ../hw/pci/pci.c:2174 +#4  0x00005638970eb54b in device_set_realized (obj=0x5638996e0e70, +value=true, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:494 +#5  0x00005638970f5e14 in property_set_bool (obj=0x5638996e0e70, +v=0x5638996f3770, name=0x56389759b141 "realized", +opaque=0x5638987893d0, errp=0x7ffd3c2b84e0) +      at ../qom/object.c:2374 +#6  0x00005638970f39f8 in object_property_set (obj=0x5638996e0e70, +name=0x56389759b141 "realized", v=0x5638996f3770, errp=0x7ffd3c2b84e0) +      at ../qom/object.c:1449 +#7  0x00005638970f8586 in object_property_set_qobject +(obj=0x5638996e0e70, name=0x56389759b141 "realized", +value=0x5638996df900, errp=0x7ffd3c2b84e0) +      at ../qom/qom-qobject.c:28 +#8  0x00005638970f3d8d in object_property_set_bool +(obj=0x5638996e0e70, name=0x56389759b141 "realized", value=true, +errp=0x7ffd3c2b84e0) +      at ../qom/object.c:1519 +#9  0x00005638970eacb0 in qdev_realize (dev=0x5638996e0e70, +bus=0x563898cf3c20, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:276 +#10 0x0000563896dba675 in qdev_device_add_from_qdict +(opts=0x5638996dfe50, from_json=false, errp=0x7ffd3c2b84e0) at ../ +system/qdev-monitor.c:714 +#11 0x0000563896dba721 in qdev_device_add (opts=0x563898786150, +errp=0x56389855dc40 ) at ../system/qdev-monitor.c:733 +#12 0x0000563896dc48f1 in device_init_func (opaque=0x0, +opts=0x563898786150, errp=0x56389855dc40 ) at ../system/ +vl.c:1207 +#13 0x000056389737a6cc in qemu_opts_foreach +      (list=0x563898427b60 , func=0x563896dc48ca +, opaque=0x0, errp=0x56389855dc40 ) +      at ../util/qemu-option.c:1135 +#14 0x0000563896dc89b5 in qemu_create_cli_devices () at ../system/ +vl.c:2745 +#15 0x0000563896dc8c00 in qmp_x_exit_preconfig (errp=0x56389855dc40 +) at ../system/vl.c:2806 +#16 0x0000563896dcb5de in qemu_init (argc=33, argv=0x7ffd3c2b8948) +at ../system/vl.c:3838 +#17 0x0000563897297323 in main (argc=33, argv=0x7ffd3c2b8948) at ../ +system/main.c:72 +So the attached adjusted version of your patch does seem to help.  At +least I can't reproduce the crash on my stand. +Thanks for the stack trace; the calls to SPICE_RING_INIT in init_qxl_ram +are +definitely harmful.  Try V2 of the patch, attached, which skips the lines +of init_qxl_ram that modify guest memory. +Thanks, your v2 patch does seem to prevent the crash.  Would you re-send +it to the list as a proper fix? +Yes. Was waiting for your confirmation. +I'm wondering, could it be useful to explicitly mark all the reused +memory regions readonly upon cpr-transfer, and then make them writable +back again after the migration is done?  That way we will be segfaulting +early on instead of debugging tricky memory corruptions. +It's a useful debugging technique, but changing protection on a large +memory region +can be too expensive for production due to TLB shootdowns. + +Also, there are cases where writes are performed but the value is +guaranteed to +be the same: +   qxl_post_load() +     qxl_set_mode() +       d->rom->mode = cpu_to_le32(modenr); +The value is the same because mode and shadow_rom.mode were passed in +vmstate +from old qemu. +There're also cases where devices' ROM might be re-initialized.  E.g. +this segfault occures upon further exploration of RO mapped RAM blocks: +Program terminated with signal SIGSEGV, Segmentation fault. +#0  __memmove_avx_unaligned_erms () at +../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:664 +664             rep     movsb +[Current thread is 1 (Thread 0x7f6e7d08b480 (LWP 310379))] +(gdb) bt +#0  __memmove_avx_unaligned_erms () at +../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:664 +#1  0x000055aa1d030ecd in rom_set_mr (rom=0x55aa200ba380, owner=0x55aa2019ac10, +name=0x7fffb8272bc0 "/rom@etc/acpi/tables", ro=true) +     at ../hw/core/loader.c:1032 +#2  0x000055aa1d031577 in rom_add_blob +     (name=0x55aa1da51f13 "etc/acpi/tables", blob=0x55aa208a1070, len=131072, max_len=2097152, +addr=18446744073709551615, fw_file_name=0x55aa1da51f13 "etc/acpi/tables", +fw_callback=0x55aa1d441f59 , callback_opaque=0x55aa20ff0010, as=0x0, +read_only=true) at ../hw/core/loader.c:1147 +#3  0x000055aa1cfd788d in acpi_add_rom_blob +     (update=0x55aa1d441f59 , opaque=0x55aa20ff0010, +blob=0x55aa1fc9aa00, name=0x55aa1da51f13 "etc/acpi/tables") at ../hw/acpi/utils.c:46 +#4  0x000055aa1d44213f in acpi_setup () at ../hw/i386/acpi-build.c:2720 +#5  0x000055aa1d434199 in pc_machine_done (notifier=0x55aa1ff15050, data=0x0) +at ../hw/i386/pc.c:638 +#6  0x000055aa1d876845 in notifier_list_notify (list=0x55aa1ea25c10 +, data=0x0) at ../util/notify.c:39 +#7  0x000055aa1d039ee5 in qdev_machine_creation_done () at +../hw/core/machine.c:1749 +#8  0x000055aa1d2c7b3e in qemu_machine_creation_done (errp=0x55aa1ea5cc40 +) at ../system/vl.c:2779 +#9  0x000055aa1d2c7c7d in qmp_x_exit_preconfig (errp=0x55aa1ea5cc40 +) at ../system/vl.c:2807 +#10 0x000055aa1d2ca64f in qemu_init (argc=35, argv=0x7fffb82730e8) at +../system/vl.c:3838 +#11 0x000055aa1d79638c in main (argc=35, argv=0x7fffb82730e8) at +../system/main.c:72 +I'm not sure whether ACPI tables ROM in particular is rewritten with the +same content, but there might be cases where ROM can be read from file +system upon initialization.  That is undesirable as guest kernel +certainly won't be too happy about sudden change of the device's ROM +content. + +So the issue we're dealing with here is any unwanted memory related +device initialization upon cpr. + +For now the only thing that comes to my mind is to make a test where we +put as many devices as we can into a VM, make ram blocks RO upon cpr +(and remap them as RW later after migration is done, if needed), and +catch any unwanted memory violations.  As Den suggested, we might +consider adding that behaviour as a separate non-default option (or +"migrate" command flag specific to cpr-transfer), which would only be +used in the testing. +I'll look into adding an option, but there may be too many false positives, +such as the qxl_set_mode case above. And the maintainers may object to me +eliminating the false positives by adding more CPR_IN tests, due to gratuitous +(from their POV) ugliness. + +But I will use the technique to look for more write violations. +Andrey +No way. ACPI with the source must be used in the same way as BIOSes +and optional ROMs. +Yup, its a bug. Will fix. + +- Steve + +see +1741380954-341079-1-git-send-email-steven.sistare@oracle.com +/">https://lore.kernel.org/qemu-devel/ +1741380954-341079-1-git-send-email-steven.sistare@oracle.com +/ +- Steve + +On 3/6/2025 11:13 AM, Steven Sistare wrote: +On 3/6/2025 10:52 AM, Denis V. Lunev wrote: +On 3/6/25 16:16, Andrey Drobyshev wrote: +On 3/5/25 11:19 PM, Steven Sistare wrote: +On 3/5/2025 11:50 AM, Andrey Drobyshev wrote: +On 3/4/25 9:05 PM, Steven Sistare wrote: +On 2/28/2025 1:37 PM, Andrey Drobyshev wrote: +On 2/28/25 8:35 PM, Andrey Drobyshev wrote: +On 2/28/25 8:20 PM, Steven Sistare wrote: +On 2/28/2025 1:13 PM, Steven Sistare wrote: +On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: +Hi all, + +We've been experimenting with cpr-transfer migration mode recently +and +have discovered the following issue with the guest QXL driver: + +Run migration source: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-src.sock + +$EMULATOR -enable-kvm \ +        -machine q35 \ +        -cpu host -smp 2 -m 2G \ +        -object memory-backend-file,id=ram0,size=2G,mem-path=/ +dev/shm/ +ram0,share=on\ +        -machine memory-backend=ram0 \ +        -machine aux-ram-share=on \ +        -drive file=$ROOTFS,media=disk,if=virtio \ +        -qmp unix:$QMPSOCK,server=on,wait=off \ +        -nographic \ +        -device qxl-vga +Run migration target: +EMULATOR=/path/to/emulator +ROOTFS=/path/to/image +QMPSOCK=/var/run/alma8qmp-dst.sock +$EMULATOR -enable-kvm \ +        -machine q35 \ +        -cpu host -smp 2 -m 2G \ +        -object memory-backend-file,id=ram0,size=2G,mem-path=/ +dev/shm/ +ram0,share=on\ +        -machine memory-backend=ram0 \ +        -machine aux-ram-share=on \ +        -drive file=$ROOTFS,media=disk,if=virtio \ +        -qmp unix:$QMPSOCK,server=on,wait=off \ +        -nographic \ +        -device qxl-vga \ +        -incoming tcp:0:44444 \ +        -incoming '{"channel-type": "cpr", "addr": { "transport": +"socket", "type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' +Launch the migration: +QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell +QMPSOCK=/var/run/alma8qmp-src.sock + +$QMPSHELL -p $QMPSOCK < /dev/tty3 +            done +done + +echo "bug could not be reproduced" +exit 0 +The bug itself seems to remain unfixed, as I was able to reproduce +that +with Fedora 41 guest, as well as AlmaLinux 8 guest. However our +cpr-transfer code also seems to be buggy as it triggers the crash - +without the cpr-transfer migration the above reproduce doesn't +lead to +crash on the source VM. + +I suspect that, as cpr-transfer doesn't migrate the guest +memory, but +rather passes it through the memory backend object, our code might +somehow corrupt the VRAM.  However, I wasn't able to trace the +corruption so far. + +Could somebody help the investigation and take a look into +this?  Any +suggestions would be appreciated.  Thanks! +Possibly some memory region created by qxl is not being preserved. +Try adding these traces to see what is preserved: + +-trace enable='*cpr*' +-trace enable='*ram_alloc*' +Also try adding this patch to see if it flags any ram blocks as not +compatible with cpr.  A message is printed at migration start time. +    +https://lore.kernel.org/qemu-devel/1740667681-257312-1-git-send- +email- +steven.sistare@oracle.com/ + +- Steve +With the traces enabled + the "migration: ram block cpr blockers" +patch +applied: + +Source: +cpr_find_fd pc.bios, id 0 returns -1 +cpr_save_fd pc.bios, id 0, fd 22 +qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 22 host +0x7fec18e00000 +cpr_find_fd pc.rom, id 0 returns -1 +cpr_save_fd pc.rom, id 0, fd 23 +qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 23 host +0x7fec18c00000 +cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns -1 +cpr_save_fd 0000:00:01.0/e1000e.rom, id 0, fd 24 +qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size +262144 fd 24 host 0x7fec18a00000 +cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns -1 +cpr_save_fd 0000:00:02.0/vga.vram, id 0, fd 25 +qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size +67108864 fd 25 host 0x7feb77e00000 +cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.vrom, id 0, fd 27 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 +fd 27 host 0x7fec18800000 +cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.vram, id 0, fd 28 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size +67108864 fd 28 host 0x7feb73c00000 +cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns -1 +cpr_save_fd 0000:00:02.0/qxl.rom, id 0, fd 34 +qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 +fd 34 host 0x7fec18600000 +cpr_find_fd /rom@etc/acpi/tables, id 0 returns -1 +cpr_save_fd /rom@etc/acpi/tables, id 0, fd 35 +qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size +2097152 fd 35 host 0x7fec18200000 +cpr_find_fd /rom@etc/table-loader, id 0 returns -1 +cpr_save_fd /rom@etc/table-loader, id 0, fd 36 +qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 +fd 36 host 0x7feb8b600000 +cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns -1 +cpr_save_fd /rom@etc/acpi/rsdp, id 0, fd 37 +qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd +37 host 0x7feb8b400000 + +cpr_state_save cpr-transfer mode +cpr_transfer_output /var/run/alma8cpr-dst.sock +Target: +cpr_transfer_input /var/run/alma8cpr-dst.sock +cpr_state_load cpr-transfer mode +cpr_find_fd pc.bios, id 0 returns 20 +qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 20 host +0x7fcdc9800000 +cpr_find_fd pc.rom, id 0 returns 19 +qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 19 host +0x7fcdc9600000 +cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns 18 +qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size +262144 fd 18 host 0x7fcdc9400000 +cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns 17 +qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size +67108864 fd 17 host 0x7fcd27e00000 +cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns 16 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 +fd 16 host 0x7fcdc9200000 +cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns 15 +qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size +67108864 fd 15 host 0x7fcd23c00000 +cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns 14 +qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 +fd 14 host 0x7fcdc8800000 +cpr_find_fd /rom@etc/acpi/tables, id 0 returns 13 +qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size +2097152 fd 13 host 0x7fcdc8400000 +cpr_find_fd /rom@etc/table-loader, id 0 returns 11 +qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 +fd 11 host 0x7fcdc8200000 +cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns 10 +qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd +10 host 0x7fcd3be00000 +Looks like both vga.vram and qxl.vram are being preserved (with the +same +addresses), and no incompatible ram blocks are found during migration. +Sorry, addressed are not the same, of course.  However corresponding +ram +blocks do seem to be preserved and initialized. +So far, I have not reproduced the guest driver failure. + +However, I have isolated places where new QEMU improperly writes to +the qxl memory regions prior to starting the guest, by mmap'ing them +readonly after cpr: + +    qemu_ram_alloc_internal() +      if (reused && (strstr(name, "qxl") || strstr("name", "vga"))) +          ram_flags |= RAM_READONLY; +      new_block = qemu_ram_alloc_from_fd(...) + +I have attached a draft fix; try it and let me know. +My console window looks fine before and after cpr, using +-vnc $hostip:0 -vga qxl + +- Steve +Regarding the reproduce: when I launch the buggy version with the same +options as you, i.e. "-vnc 0.0.0.0:$port -vga qxl", and do cpr-transfer, +my VNC client silently hangs on the target after a while.  Could it +happen on your stand as well? +cpr does not preserve the vnc connection and session.  To test, I specify +port 0 for the source VM and port 1 for the dest.  When the src vnc goes +dormant the dest vnc becomes active. +Sure, I meant that VNC on the dest (on the port 1) works for a while +after the migration and then hangs, apparently after the guest QXL crash. +Could you try launching VM with +"-nographic -device qxl-vga"?  That way VM's serial console is given you +directly in the shell, so when qxl driver crashes you're still able to +inspect the kernel messages. +I have been running like that, but have not reproduced the qxl driver +crash, +and I suspect my guest image+kernel is too old. +Yes, that's probably the case.  But the crash occurs on my Fedora 41 +guest with the 6.11.5-300.fc41.x86_64 kernel, so newer kernels seem to +be buggy. +However, once I realized the +issue was post-cpr modification of qxl memory, I switched my attention +to the +fix. +As for your patch, I can report that it doesn't resolve the issue as it +is.  But I was able to track down another possible memory corruption +using your approach with readonly mmap'ing: +Program terminated with signal SIGSEGV, Segmentation fault. +#0  init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 +412         d->ram->magic       = cpu_to_le32(QXL_RAM_MAGIC); +[Current thread is 1 (Thread 0x7f1a4f83b480 (LWP 229798))] +(gdb) bt +#0  init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 +#1  0x0000563896e7f467 in qxl_realize_common (qxl=0x5638996e0e70, +errp=0x7ffd3c2b8170) at ../hw/display/qxl.c:2142 +#2  0x0000563896e7fda1 in qxl_realize_primary (dev=0x5638996e0e70, +errp=0x7ffd3c2b81d0) at ../hw/display/qxl.c:2257 +#3  0x0000563896c7e8f2 in pci_qdev_realize (qdev=0x5638996e0e70, +errp=0x7ffd3c2b8250) at ../hw/pci/pci.c:2174 +#4  0x00005638970eb54b in device_set_realized (obj=0x5638996e0e70, +value=true, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:494 +#5  0x00005638970f5e14 in property_set_bool (obj=0x5638996e0e70, +v=0x5638996f3770, name=0x56389759b141 "realized", +opaque=0x5638987893d0, errp=0x7ffd3c2b84e0) +      at ../qom/object.c:2374 +#6  0x00005638970f39f8 in object_property_set (obj=0x5638996e0e70, +name=0x56389759b141 "realized", v=0x5638996f3770, errp=0x7ffd3c2b84e0) +      at ../qom/object.c:1449 +#7  0x00005638970f8586 in object_property_set_qobject +(obj=0x5638996e0e70, name=0x56389759b141 "realized", +value=0x5638996df900, errp=0x7ffd3c2b84e0) +      at ../qom/qom-qobject.c:28 +#8  0x00005638970f3d8d in object_property_set_bool +(obj=0x5638996e0e70, name=0x56389759b141 "realized", value=true, +errp=0x7ffd3c2b84e0) +      at ../qom/object.c:1519 +#9  0x00005638970eacb0 in qdev_realize (dev=0x5638996e0e70, +bus=0x563898cf3c20, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:276 +#10 0x0000563896dba675 in qdev_device_add_from_qdict +(opts=0x5638996dfe50, from_json=false, errp=0x7ffd3c2b84e0) at ../ +system/qdev-monitor.c:714 +#11 0x0000563896dba721 in qdev_device_add (opts=0x563898786150, +errp=0x56389855dc40 ) at ../system/qdev-monitor.c:733 +#12 0x0000563896dc48f1 in device_init_func (opaque=0x0, +opts=0x563898786150, errp=0x56389855dc40 ) at ../system/ +vl.c:1207 +#13 0x000056389737a6cc in qemu_opts_foreach +      (list=0x563898427b60 , func=0x563896dc48ca +, opaque=0x0, errp=0x56389855dc40 ) +      at ../util/qemu-option.c:1135 +#14 0x0000563896dc89b5 in qemu_create_cli_devices () at ../system/ +vl.c:2745 +#15 0x0000563896dc8c00 in qmp_x_exit_preconfig (errp=0x56389855dc40 +) at ../system/vl.c:2806 +#16 0x0000563896dcb5de in qemu_init (argc=33, argv=0x7ffd3c2b8948) +at ../system/vl.c:3838 +#17 0x0000563897297323 in main (argc=33, argv=0x7ffd3c2b8948) at ../ +system/main.c:72 +So the attached adjusted version of your patch does seem to help.  At +least I can't reproduce the crash on my stand. +Thanks for the stack trace; the calls to SPICE_RING_INIT in init_qxl_ram +are +definitely harmful.  Try V2 of the patch, attached, which skips the lines +of init_qxl_ram that modify guest memory. +Thanks, your v2 patch does seem to prevent the crash.  Would you re-send +it to the list as a proper fix? +Yes.  Was waiting for your confirmation. +I'm wondering, could it be useful to explicitly mark all the reused +memory regions readonly upon cpr-transfer, and then make them writable +back again after the migration is done?  That way we will be segfaulting +early on instead of debugging tricky memory corruptions. +It's a useful debugging technique, but changing protection on a large +memory region +can be too expensive for production due to TLB shootdowns. + +Also, there are cases where writes are performed but the value is +guaranteed to +be the same: +   qxl_post_load() +     qxl_set_mode() +       d->rom->mode = cpu_to_le32(modenr); +The value is the same because mode and shadow_rom.mode were passed in +vmstate +from old qemu. +There're also cases where devices' ROM might be re-initialized.  E.g. +this segfault occures upon further exploration of RO mapped RAM blocks: +Program terminated with signal SIGSEGV, Segmentation fault. +#0  __memmove_avx_unaligned_erms () at +../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:664 +664             rep     movsb +[Current thread is 1 (Thread 0x7f6e7d08b480 (LWP 310379))] +(gdb) bt +#0  __memmove_avx_unaligned_erms () at +../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:664 +#1  0x000055aa1d030ecd in rom_set_mr (rom=0x55aa200ba380, owner=0x55aa2019ac10, +name=0x7fffb8272bc0 "/rom@etc/acpi/tables", ro=true) +     at ../hw/core/loader.c:1032 +#2  0x000055aa1d031577 in rom_add_blob +     (name=0x55aa1da51f13 "etc/acpi/tables", blob=0x55aa208a1070, len=131072, max_len=2097152, +addr=18446744073709551615, fw_file_name=0x55aa1da51f13 "etc/acpi/tables", +fw_callback=0x55aa1d441f59 , callback_opaque=0x55aa20ff0010, as=0x0, +read_only=true) at ../hw/core/loader.c:1147 +#3  0x000055aa1cfd788d in acpi_add_rom_blob +     (update=0x55aa1d441f59 , opaque=0x55aa20ff0010, +blob=0x55aa1fc9aa00, name=0x55aa1da51f13 "etc/acpi/tables") at ../hw/acpi/utils.c:46 +#4  0x000055aa1d44213f in acpi_setup () at ../hw/i386/acpi-build.c:2720 +#5  0x000055aa1d434199 in pc_machine_done (notifier=0x55aa1ff15050, data=0x0) +at ../hw/i386/pc.c:638 +#6  0x000055aa1d876845 in notifier_list_notify (list=0x55aa1ea25c10 +, data=0x0) at ../util/notify.c:39 +#7  0x000055aa1d039ee5 in qdev_machine_creation_done () at +../hw/core/machine.c:1749 +#8  0x000055aa1d2c7b3e in qemu_machine_creation_done (errp=0x55aa1ea5cc40 +) at ../system/vl.c:2779 +#9  0x000055aa1d2c7c7d in qmp_x_exit_preconfig (errp=0x55aa1ea5cc40 +) at ../system/vl.c:2807 +#10 0x000055aa1d2ca64f in qemu_init (argc=35, argv=0x7fffb82730e8) at +../system/vl.c:3838 +#11 0x000055aa1d79638c in main (argc=35, argv=0x7fffb82730e8) at +../system/main.c:72 +I'm not sure whether ACPI tables ROM in particular is rewritten with the +same content, but there might be cases where ROM can be read from file +system upon initialization.  That is undesirable as guest kernel +certainly won't be too happy about sudden change of the device's ROM +content. + +So the issue we're dealing with here is any unwanted memory related +device initialization upon cpr. + +For now the only thing that comes to my mind is to make a test where we +put as many devices as we can into a VM, make ram blocks RO upon cpr +(and remap them as RW later after migration is done, if needed), and +catch any unwanted memory violations.  As Den suggested, we might +consider adding that behaviour as a separate non-default option (or +"migrate" command flag specific to cpr-transfer), which would only be +used in the testing. +I'll look into adding an option, but there may be too many false positives, +such as the qxl_set_mode case above.  And the maintainers may object to me +eliminating the false positives by adding more CPR_IN tests, due to gratuitous +(from their POV) ugliness. + +But I will use the technique to look for more write violations. +Andrey +No way. ACPI with the source must be used in the same way as BIOSes +and optional ROMs. +Yup, its a bug.  Will fix. + +- Steve + diff --git a/classification_output/01/mistranslation/3886413 b/classification_output/01/mistranslation/3886413 deleted file mode 100644 index 5f79c452f..000000000 --- a/classification_output/01/mistranslation/3886413 +++ /dev/null @@ -1,33 +0,0 @@ -mistranslation: 0.637 -instruction: 0.555 -other: 0.535 -semantic: 0.487 - -[Qemu-devel] [BUG] vhost-user: hot-unplug vhost-user nic for windows guest OS will fail with 100% reproduce rate - -Hi, guys - -I met a problem when hot-unplug vhost-user nic for Windows 2008 rc2 sp1 64 -(Guest OS) - -The xml of nic is as followed: - - - - - - -
- - -Firstly, I use virsh attach-device win2008 vif.xml to hot-plug a nic for Guest -OS. This operation returns success. -After guest OS discover nic successfully, I use virsh detach-device win2008 -vif.xml to hot-unplug it. This operation will fail with 100% reproduce rate. - -However, if I hot-plug and hot-unplug virtio-net nic , it will not fail. - -I have analysis the process of qmp_device_del , I found that qemu have inject -interrupt to acpi to let it notice guest OS to remove nic. -I guess there is something wrong in Windows when handle the interrupt. - diff --git a/classification_output/01/mistranslation/4158985 b/classification_output/01/mistranslation/4158985 deleted file mode 100644 index 798c2e866..000000000 --- a/classification_output/01/mistranslation/4158985 +++ /dev/null @@ -1,1480 +0,0 @@ -mistranslation: 0.922 -other: 0.898 -semantic: 0.890 -instruction: 0.877 - -[BUG] vhost-vdpa: qemu-system-s390x crashes with second virtio-net-ccw device - -When I start qemu with a second virtio-net-ccw device (i.e. adding --device virtio-net-ccw in addition to the autogenerated device), I get -a segfault. gdb points to - -#0 0x000055d6ab52681d in virtio_net_get_config (vdev=, - config=0x55d6ad9e3f80 "RT") at /home/cohuck/git/qemu/hw/net/virtio-net.c:146 -146 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { - -(backtrace doesn't go further) - -Starting qemu with no additional "-device virtio-net-ccw" (i.e., only -the autogenerated virtio-net-ccw device is present) works. Specifying -several "-device virtio-net-pci" works as well. - -Things break with 1e0a84ea49b6 ("vhost-vdpa: introduce vhost-vdpa net -client"), 38140cc4d971 ("vhost_net: introduce set_config & get_config") -works (in-between state does not compile). - -This is reproducible with tcg as well. Same problem both with ---enable-vhost-vdpa and --disable-vhost-vdpa. - -Have not yet tried to figure out what might be special with -virtio-ccw... anyone have an idea? - -[This should probably be considered a blocker?] - -On Fri, Jul 24, 2020 at 03:27:18PM +0200, Cornelia Huck wrote: -> -When I start qemu with a second virtio-net-ccw device (i.e. adding -> --device virtio-net-ccw in addition to the autogenerated device), I get -> -a segfault. gdb points to -> -> -#0 0x000055d6ab52681d in virtio_net_get_config (vdev=, -> -config=0x55d6ad9e3f80 "RT") at -> -/home/cohuck/git/qemu/hw/net/virtio-net.c:146 -> -146 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { -> -> -(backtrace doesn't go further) -> -> -Starting qemu with no additional "-device virtio-net-ccw" (i.e., only -> -the autogenerated virtio-net-ccw device is present) works. Specifying -> -several "-device virtio-net-pci" works as well. -> -> -Things break with 1e0a84ea49b6 ("vhost-vdpa: introduce vhost-vdpa net -> -client"), 38140cc4d971 ("vhost_net: introduce set_config & get_config") -> -works (in-between state does not compile). -Ouch. I didn't test all in-between states :( -But I wish we had a 0-day instrastructure like kernel has, -that catches things like that. - -> -This is reproducible with tcg as well. Same problem both with -> ---enable-vhost-vdpa and --disable-vhost-vdpa. -> -> -Have not yet tried to figure out what might be special with -> -virtio-ccw... anyone have an idea? -> -> -[This should probably be considered a blocker?] - -On Fri, 24 Jul 2020 09:30:58 -0400 -"Michael S. Tsirkin" wrote: - -> -On Fri, Jul 24, 2020 at 03:27:18PM +0200, Cornelia Huck wrote: -> -> When I start qemu with a second virtio-net-ccw device (i.e. adding -> -> -device virtio-net-ccw in addition to the autogenerated device), I get -> -> a segfault. gdb points to -> -> -> -> #0 0x000055d6ab52681d in virtio_net_get_config (vdev=, -> -> config=0x55d6ad9e3f80 "RT") at -> -> /home/cohuck/git/qemu/hw/net/virtio-net.c:146 -> -> 146 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { -> -> -> -> (backtrace doesn't go further) -The core was incomplete, but running under gdb directly shows that it -is just a bog-standard config space access (first for that device). - -The cause of the crash is that nc->peer is not set... no idea how that -can happen, not that familiar with that part of QEMU. (Should the code -check, or is that really something that should not happen?) - -What I don't understand is why it is set correctly for the first, -autogenerated virtio-net-ccw device, but not for the second one, and -why virtio-net-pci doesn't show these problems. The only difference -between -ccw and -pci that comes to my mind here is that config space -accesses for ccw are done via an asynchronous operation, so timing -might be different. - -> -> -> -> Starting qemu with no additional "-device virtio-net-ccw" (i.e., only -> -> the autogenerated virtio-net-ccw device is present) works. Specifying -> -> several "-device virtio-net-pci" works as well. -> -> -> -> Things break with 1e0a84ea49b6 ("vhost-vdpa: introduce vhost-vdpa net -> -> client"), 38140cc4d971 ("vhost_net: introduce set_config & get_config") -> -> works (in-between state does not compile). -> -> -Ouch. I didn't test all in-between states :( -> -But I wish we had a 0-day instrastructure like kernel has, -> -that catches things like that. -Yep, that would be useful... so patchew only builds the complete series? - -> -> -> This is reproducible with tcg as well. Same problem both with -> -> --enable-vhost-vdpa and --disable-vhost-vdpa. -> -> -> -> Have not yet tried to figure out what might be special with -> -> virtio-ccw... anyone have an idea? -> -> -> -> [This should probably be considered a blocker?] -I think so, as it makes s390x unusable with more that one -virtio-net-ccw device, and I don't even see a workaround. - -On Fri, Jul 24, 2020 at 04:56:27PM +0200, Cornelia Huck wrote: -> -On Fri, 24 Jul 2020 09:30:58 -0400 -> -"Michael S. Tsirkin" wrote: -> -> -> On Fri, Jul 24, 2020 at 03:27:18PM +0200, Cornelia Huck wrote: -> -> > When I start qemu with a second virtio-net-ccw device (i.e. adding -> -> > -device virtio-net-ccw in addition to the autogenerated device), I get -> -> > a segfault. gdb points to -> -> > -> -> > #0 0x000055d6ab52681d in virtio_net_get_config (vdev=, -> -> > config=0x55d6ad9e3f80 "RT") at -> -> > /home/cohuck/git/qemu/hw/net/virtio-net.c:146 -> -> > 146 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { -> -> > -> -> > (backtrace doesn't go further) -> -> -The core was incomplete, but running under gdb directly shows that it -> -is just a bog-standard config space access (first for that device). -> -> -The cause of the crash is that nc->peer is not set... no idea how that -> -can happen, not that familiar with that part of QEMU. (Should the code -> -check, or is that really something that should not happen?) -> -> -What I don't understand is why it is set correctly for the first, -> -autogenerated virtio-net-ccw device, but not for the second one, and -> -why virtio-net-pci doesn't show these problems. The only difference -> -between -ccw and -pci that comes to my mind here is that config space -> -accesses for ccw are done via an asynchronous operation, so timing -> -might be different. -Hopefully Jason has an idea. Could you post a full command line -please? Do you need a working guest to trigger this? Does this trigger -on an x86 host? - -> -> > -> -> > Starting qemu with no additional "-device virtio-net-ccw" (i.e., only -> -> > the autogenerated virtio-net-ccw device is present) works. Specifying -> -> > several "-device virtio-net-pci" works as well. -> -> > -> -> > Things break with 1e0a84ea49b6 ("vhost-vdpa: introduce vhost-vdpa net -> -> > client"), 38140cc4d971 ("vhost_net: introduce set_config & get_config") -> -> > works (in-between state does not compile). -> -> -> -> Ouch. I didn't test all in-between states :( -> -> But I wish we had a 0-day instrastructure like kernel has, -> -> that catches things like that. -> -> -Yep, that would be useful... so patchew only builds the complete series? -> -> -> -> -> > This is reproducible with tcg as well. Same problem both with -> -> > --enable-vhost-vdpa and --disable-vhost-vdpa. -> -> > -> -> > Have not yet tried to figure out what might be special with -> -> > virtio-ccw... anyone have an idea? -> -> > -> -> > [This should probably be considered a blocker?] -> -> -I think so, as it makes s390x unusable with more that one -> -virtio-net-ccw device, and I don't even see a workaround. - -On Fri, 24 Jul 2020 11:17:57 -0400 -"Michael S. Tsirkin" wrote: - -> -On Fri, Jul 24, 2020 at 04:56:27PM +0200, Cornelia Huck wrote: -> -> On Fri, 24 Jul 2020 09:30:58 -0400 -> -> "Michael S. Tsirkin" wrote: -> -> -> -> > On Fri, Jul 24, 2020 at 03:27:18PM +0200, Cornelia Huck wrote: -> -> > > When I start qemu with a second virtio-net-ccw device (i.e. adding -> -> > > -device virtio-net-ccw in addition to the autogenerated device), I get -> -> > > a segfault. gdb points to -> -> > > -> -> > > #0 0x000055d6ab52681d in virtio_net_get_config (vdev=, -> -> > > config=0x55d6ad9e3f80 "RT") at -> -> > > /home/cohuck/git/qemu/hw/net/virtio-net.c:146 -> -> > > 146 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { -> -> > > -> -> > > (backtrace doesn't go further) -> -> -> -> The core was incomplete, but running under gdb directly shows that it -> -> is just a bog-standard config space access (first for that device). -> -> -> -> The cause of the crash is that nc->peer is not set... no idea how that -> -> can happen, not that familiar with that part of QEMU. (Should the code -> -> check, or is that really something that should not happen?) -> -> -> -> What I don't understand is why it is set correctly for the first, -> -> autogenerated virtio-net-ccw device, but not for the second one, and -> -> why virtio-net-pci doesn't show these problems. The only difference -> -> between -ccw and -pci that comes to my mind here is that config space -> -> accesses for ccw are done via an asynchronous operation, so timing -> -> might be different. -> -> -Hopefully Jason has an idea. Could you post a full command line -> -please? Do you need a working guest to trigger this? Does this trigger -> -on an x86 host? -Yes, it does trigger with tcg-on-x86 as well. I've been using - -s390x-softmmu/qemu-system-s390x -M s390-ccw-virtio,accel=tcg -cpu qemu,zpci=on --m 1024 -nographic -device virtio-scsi-ccw,id=scsi0,devno=fe.0.0001 --drive file=/path/to/image,format=qcow2,if=none,id=drive-scsi0-0-0-0 --device -scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0-0-0-0,id=scsi0-0-0-0,bootindex=1 - --device virtio-net-ccw - -It seems it needs the guest actually doing something with the nics; I -cannot reproduce the crash if I use the old advent calendar moon buggy -image and just add a virtio-net-ccw device. - -(I don't think it's a problem with my local build, as I see the problem -both on my laptop and on an LPAR.) - -> -> -> > > -> -> > > Starting qemu with no additional "-device virtio-net-ccw" (i.e., only -> -> > > the autogenerated virtio-net-ccw device is present) works. Specifying -> -> > > several "-device virtio-net-pci" works as well. -> -> > > -> -> > > Things break with 1e0a84ea49b6 ("vhost-vdpa: introduce vhost-vdpa net -> -> > > client"), 38140cc4d971 ("vhost_net: introduce set_config & get_config") -> -> > > works (in-between state does not compile). -> -> > -> -> > Ouch. I didn't test all in-between states :( -> -> > But I wish we had a 0-day instrastructure like kernel has, -> -> > that catches things like that. -> -> -> -> Yep, that would be useful... so patchew only builds the complete series? -> -> -> -> > -> -> > > This is reproducible with tcg as well. Same problem both with -> -> > > --enable-vhost-vdpa and --disable-vhost-vdpa. -> -> > > -> -> > > Have not yet tried to figure out what might be special with -> -> > > virtio-ccw... anyone have an idea? -> -> > > -> -> > > [This should probably be considered a blocker?] -> -> -> -> I think so, as it makes s390x unusable with more that one -> -> virtio-net-ccw device, and I don't even see a workaround. -> - -On 2020/7/24 下午11:34, Cornelia Huck wrote: -On Fri, 24 Jul 2020 11:17:57 -0400 -"Michael S. Tsirkin" wrote: -On Fri, Jul 24, 2020 at 04:56:27PM +0200, Cornelia Huck wrote: -On Fri, 24 Jul 2020 09:30:58 -0400 -"Michael S. Tsirkin" wrote: -On Fri, Jul 24, 2020 at 03:27:18PM +0200, Cornelia Huck wrote: -When I start qemu with a second virtio-net-ccw device (i.e. adding --device virtio-net-ccw in addition to the autogenerated device), I get -a segfault. gdb points to - -#0 0x000055d6ab52681d in virtio_net_get_config (vdev=, - config=0x55d6ad9e3f80 "RT") at -/home/cohuck/git/qemu/hw/net/virtio-net.c:146 -146 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { - -(backtrace doesn't go further) -The core was incomplete, but running under gdb directly shows that it -is just a bog-standard config space access (first for that device). - -The cause of the crash is that nc->peer is not set... no idea how that -can happen, not that familiar with that part of QEMU. (Should the code -check, or is that really something that should not happen?) - -What I don't understand is why it is set correctly for the first, -autogenerated virtio-net-ccw device, but not for the second one, and -why virtio-net-pci doesn't show these problems. The only difference -between -ccw and -pci that comes to my mind here is that config space -accesses for ccw are done via an asynchronous operation, so timing -might be different. -Hopefully Jason has an idea. Could you post a full command line -please? Do you need a working guest to trigger this? Does this trigger -on an x86 host? -Yes, it does trigger with tcg-on-x86 as well. I've been using - -s390x-softmmu/qemu-system-s390x -M s390-ccw-virtio,accel=tcg -cpu qemu,zpci=on --m 1024 -nographic -device virtio-scsi-ccw,id=scsi0,devno=fe.0.0001 --drive file=/path/to/image,format=qcow2,if=none,id=drive-scsi0-0-0-0 --device -scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0-0-0-0,id=scsi0-0-0-0,bootindex=1 --device virtio-net-ccw - -It seems it needs the guest actually doing something with the nics; I -cannot reproduce the crash if I use the old advent calendar moon buggy -image and just add a virtio-net-ccw device. - -(I don't think it's a problem with my local build, as I see the problem -both on my laptop and on an LPAR.) -It looks to me we forget the check the existence of peer. - -Please try the attached patch to see if it works. - -Thanks -0001-virtio-net-check-the-existence-of-peer-before-accesi.patch -Description: -Text Data - -On Sat, 25 Jul 2020 08:40:07 +0800 -Jason Wang wrote: - -> -On 2020/7/24 下午11:34, Cornelia Huck wrote: -> -> On Fri, 24 Jul 2020 11:17:57 -0400 -> -> "Michael S. Tsirkin" wrote: -> -> -> ->> On Fri, Jul 24, 2020 at 04:56:27PM +0200, Cornelia Huck wrote: -> ->>> On Fri, 24 Jul 2020 09:30:58 -0400 -> ->>> "Michael S. Tsirkin" wrote: -> ->>> -> ->>>> On Fri, Jul 24, 2020 at 03:27:18PM +0200, Cornelia Huck wrote: -> ->>>>> When I start qemu with a second virtio-net-ccw device (i.e. adding -> ->>>>> -device virtio-net-ccw in addition to the autogenerated device), I get -> ->>>>> a segfault. gdb points to -> ->>>>> -> ->>>>> #0 0x000055d6ab52681d in virtio_net_get_config (vdev=, -> ->>>>> config=0x55d6ad9e3f80 "RT") at -> ->>>>> /home/cohuck/git/qemu/hw/net/virtio-net.c:146 -> ->>>>> 146 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { -> ->>>>> -> ->>>>> (backtrace doesn't go further) -> ->>> The core was incomplete, but running under gdb directly shows that it -> ->>> is just a bog-standard config space access (first for that device). -> ->>> -> ->>> The cause of the crash is that nc->peer is not set... no idea how that -> ->>> can happen, not that familiar with that part of QEMU. (Should the code -> ->>> check, or is that really something that should not happen?) -> ->>> -> ->>> What I don't understand is why it is set correctly for the first, -> ->>> autogenerated virtio-net-ccw device, but not for the second one, and -> ->>> why virtio-net-pci doesn't show these problems. The only difference -> ->>> between -ccw and -pci that comes to my mind here is that config space -> ->>> accesses for ccw are done via an asynchronous operation, so timing -> ->>> might be different. -> ->> Hopefully Jason has an idea. Could you post a full command line -> ->> please? Do you need a working guest to trigger this? Does this trigger -> ->> on an x86 host? -> -> Yes, it does trigger with tcg-on-x86 as well. I've been using -> -> -> -> s390x-softmmu/qemu-system-s390x -M s390-ccw-virtio,accel=tcg -cpu -> -> qemu,zpci=on -> -> -m 1024 -nographic -device virtio-scsi-ccw,id=scsi0,devno=fe.0.0001 -> -> -drive file=/path/to/image,format=qcow2,if=none,id=drive-scsi0-0-0-0 -> -> -device -> -> scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0-0-0-0,id=scsi0-0-0-0,bootindex=1 -> -> -device virtio-net-ccw -> -> -> -> It seems it needs the guest actually doing something with the nics; I -> -> cannot reproduce the crash if I use the old advent calendar moon buggy -> -> image and just add a virtio-net-ccw device. -> -> -> -> (I don't think it's a problem with my local build, as I see the problem -> -> both on my laptop and on an LPAR.) -> -> -> -It looks to me we forget the check the existence of peer. -> -> -Please try the attached patch to see if it works. -Thanks, that patch gets my guest up and running again. So, FWIW, - -Tested-by: Cornelia Huck - -Any idea why this did not hit with virtio-net-pci (or the autogenerated -virtio-net-ccw device)? - -On 2020/7/27 下午2:43, Cornelia Huck wrote: -On Sat, 25 Jul 2020 08:40:07 +0800 -Jason Wang wrote: -On 2020/7/24 下午11:34, Cornelia Huck wrote: -On Fri, 24 Jul 2020 11:17:57 -0400 -"Michael S. Tsirkin" wrote: -On Fri, Jul 24, 2020 at 04:56:27PM +0200, Cornelia Huck wrote: -On Fri, 24 Jul 2020 09:30:58 -0400 -"Michael S. Tsirkin" wrote: -On Fri, Jul 24, 2020 at 03:27:18PM +0200, Cornelia Huck wrote: -When I start qemu with a second virtio-net-ccw device (i.e. adding --device virtio-net-ccw in addition to the autogenerated device), I get -a segfault. gdb points to - -#0 0x000055d6ab52681d in virtio_net_get_config (vdev=, - config=0x55d6ad9e3f80 "RT") at -/home/cohuck/git/qemu/hw/net/virtio-net.c:146 -146 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { - -(backtrace doesn't go further) -The core was incomplete, but running under gdb directly shows that it -is just a bog-standard config space access (first for that device). - -The cause of the crash is that nc->peer is not set... no idea how that -can happen, not that familiar with that part of QEMU. (Should the code -check, or is that really something that should not happen?) - -What I don't understand is why it is set correctly for the first, -autogenerated virtio-net-ccw device, but not for the second one, and -why virtio-net-pci doesn't show these problems. The only difference -between -ccw and -pci that comes to my mind here is that config space -accesses for ccw are done via an asynchronous operation, so timing -might be different. -Hopefully Jason has an idea. Could you post a full command line -please? Do you need a working guest to trigger this? Does this trigger -on an x86 host? -Yes, it does trigger with tcg-on-x86 as well. I've been using - -s390x-softmmu/qemu-system-s390x -M s390-ccw-virtio,accel=tcg -cpu qemu,zpci=on --m 1024 -nographic -device virtio-scsi-ccw,id=scsi0,devno=fe.0.0001 --drive file=/path/to/image,format=qcow2,if=none,id=drive-scsi0-0-0-0 --device -scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0-0-0-0,id=scsi0-0-0-0,bootindex=1 --device virtio-net-ccw - -It seems it needs the guest actually doing something with the nics; I -cannot reproduce the crash if I use the old advent calendar moon buggy -image and just add a virtio-net-ccw device. - -(I don't think it's a problem with my local build, as I see the problem -both on my laptop and on an LPAR.) -It looks to me we forget the check the existence of peer. - -Please try the attached patch to see if it works. -Thanks, that patch gets my guest up and running again. So, FWIW, - -Tested-by: Cornelia Huck - -Any idea why this did not hit with virtio-net-pci (or the autogenerated -virtio-net-ccw device)? -It can be hit with virtio-net-pci as well (just start without peer). -For autogenerated virtio-net-cww, I think the reason is that it has -already had a peer set. -Thanks - -On Mon, 27 Jul 2020 15:38:12 +0800 -Jason Wang wrote: - -> -On 2020/7/27 下午2:43, Cornelia Huck wrote: -> -> On Sat, 25 Jul 2020 08:40:07 +0800 -> -> Jason Wang wrote: -> -> -> ->> On 2020/7/24 下午11:34, Cornelia Huck wrote: -> ->>> On Fri, 24 Jul 2020 11:17:57 -0400 -> ->>> "Michael S. Tsirkin" wrote: -> ->>> -> ->>>> On Fri, Jul 24, 2020 at 04:56:27PM +0200, Cornelia Huck wrote: -> ->>>>> On Fri, 24 Jul 2020 09:30:58 -0400 -> ->>>>> "Michael S. Tsirkin" wrote: -> ->>>>> -> ->>>>>> On Fri, Jul 24, 2020 at 03:27:18PM +0200, Cornelia Huck wrote: -> ->>>>>>> When I start qemu with a second virtio-net-ccw device (i.e. adding -> ->>>>>>> -device virtio-net-ccw in addition to the autogenerated device), I get -> ->>>>>>> a segfault. gdb points to -> ->>>>>>> -> ->>>>>>> #0 0x000055d6ab52681d in virtio_net_get_config (vdev=, -> ->>>>>>> config=0x55d6ad9e3f80 "RT") at -> ->>>>>>> /home/cohuck/git/qemu/hw/net/virtio-net.c:146 -> ->>>>>>> 146 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { -> ->>>>>>> -> ->>>>>>> (backtrace doesn't go further) -> ->>>>> The core was incomplete, but running under gdb directly shows that it -> ->>>>> is just a bog-standard config space access (first for that device). -> ->>>>> -> ->>>>> The cause of the crash is that nc->peer is not set... no idea how that -> ->>>>> can happen, not that familiar with that part of QEMU. (Should the code -> ->>>>> check, or is that really something that should not happen?) -> ->>>>> -> ->>>>> What I don't understand is why it is set correctly for the first, -> ->>>>> autogenerated virtio-net-ccw device, but not for the second one, and -> ->>>>> why virtio-net-pci doesn't show these problems. The only difference -> ->>>>> between -ccw and -pci that comes to my mind here is that config space -> ->>>>> accesses for ccw are done via an asynchronous operation, so timing -> ->>>>> might be different. -> ->>>> Hopefully Jason has an idea. Could you post a full command line -> ->>>> please? Do you need a working guest to trigger this? Does this trigger -> ->>>> on an x86 host? -> ->>> Yes, it does trigger with tcg-on-x86 as well. I've been using -> ->>> -> ->>> s390x-softmmu/qemu-system-s390x -M s390-ccw-virtio,accel=tcg -cpu -> ->>> qemu,zpci=on -> ->>> -m 1024 -nographic -device virtio-scsi-ccw,id=scsi0,devno=fe.0.0001 -> ->>> -drive file=/path/to/image,format=qcow2,if=none,id=drive-scsi0-0-0-0 -> ->>> -device -> ->>> scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0-0-0-0,id=scsi0-0-0-0,bootindex=1 -> ->>> -device virtio-net-ccw -> ->>> -> ->>> It seems it needs the guest actually doing something with the nics; I -> ->>> cannot reproduce the crash if I use the old advent calendar moon buggy -> ->>> image and just add a virtio-net-ccw device. -> ->>> -> ->>> (I don't think it's a problem with my local build, as I see the problem -> ->>> both on my laptop and on an LPAR.) -> ->> -> ->> It looks to me we forget the check the existence of peer. -> ->> -> ->> Please try the attached patch to see if it works. -> -> Thanks, that patch gets my guest up and running again. So, FWIW, -> -> -> -> Tested-by: Cornelia Huck -> -> -> -> Any idea why this did not hit with virtio-net-pci (or the autogenerated -> -> virtio-net-ccw device)? -> -> -> -It can be hit with virtio-net-pci as well (just start without peer). -Hm, I had not been able to reproduce the crash with a 'naked' -device -virtio-net-pci. But checking seems to be the right idea anyway. - -> -> -For autogenerated virtio-net-cww, I think the reason is that it has -> -already had a peer set. -Ok, that might well be. - -On 2020/7/27 下午4:41, Cornelia Huck wrote: -On Mon, 27 Jul 2020 15:38:12 +0800 -Jason Wang wrote: -On 2020/7/27 下午2:43, Cornelia Huck wrote: -On Sat, 25 Jul 2020 08:40:07 +0800 -Jason Wang wrote: -On 2020/7/24 下午11:34, Cornelia Huck wrote: -On Fri, 24 Jul 2020 11:17:57 -0400 -"Michael S. Tsirkin" wrote: -On Fri, Jul 24, 2020 at 04:56:27PM +0200, Cornelia Huck wrote: -On Fri, 24 Jul 2020 09:30:58 -0400 -"Michael S. Tsirkin" wrote: -On Fri, Jul 24, 2020 at 03:27:18PM +0200, Cornelia Huck wrote: -When I start qemu with a second virtio-net-ccw device (i.e. adding --device virtio-net-ccw in addition to the autogenerated device), I get -a segfault. gdb points to - -#0 0x000055d6ab52681d in virtio_net_get_config (vdev=, - config=0x55d6ad9e3f80 "RT") at -/home/cohuck/git/qemu/hw/net/virtio-net.c:146 -146 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { - -(backtrace doesn't go further) -The core was incomplete, but running under gdb directly shows that it -is just a bog-standard config space access (first for that device). - -The cause of the crash is that nc->peer is not set... no idea how that -can happen, not that familiar with that part of QEMU. (Should the code -check, or is that really something that should not happen?) - -What I don't understand is why it is set correctly for the first, -autogenerated virtio-net-ccw device, but not for the second one, and -why virtio-net-pci doesn't show these problems. The only difference -between -ccw and -pci that comes to my mind here is that config space -accesses for ccw are done via an asynchronous operation, so timing -might be different. -Hopefully Jason has an idea. Could you post a full command line -please? Do you need a working guest to trigger this? Does this trigger -on an x86 host? -Yes, it does trigger with tcg-on-x86 as well. I've been using - -s390x-softmmu/qemu-system-s390x -M s390-ccw-virtio,accel=tcg -cpu qemu,zpci=on --m 1024 -nographic -device virtio-scsi-ccw,id=scsi0,devno=fe.0.0001 --drive file=/path/to/image,format=qcow2,if=none,id=drive-scsi0-0-0-0 --device -scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0-0-0-0,id=scsi0-0-0-0,bootindex=1 --device virtio-net-ccw - -It seems it needs the guest actually doing something with the nics; I -cannot reproduce the crash if I use the old advent calendar moon buggy -image and just add a virtio-net-ccw device. - -(I don't think it's a problem with my local build, as I see the problem -both on my laptop and on an LPAR.) -It looks to me we forget the check the existence of peer. - -Please try the attached patch to see if it works. -Thanks, that patch gets my guest up and running again. So, FWIW, - -Tested-by: Cornelia Huck - -Any idea why this did not hit with virtio-net-pci (or the autogenerated -virtio-net-ccw device)? -It can be hit with virtio-net-pci as well (just start without peer). -Hm, I had not been able to reproduce the crash with a 'naked' -device -virtio-net-pci. But checking seems to be the right idea anyway. -Sorry for being unclear, I meant for networking part, you just need -start without peer, and you need a real guest (any Linux) that is trying -to access the config space of virtio-net. -Thanks -For autogenerated virtio-net-cww, I think the reason is that it has -already had a peer set. -Ok, that might well be. - -On Mon, Jul 27, 2020 at 04:51:23PM +0800, Jason Wang wrote: -> -> -On 2020/7/27 下午4:41, Cornelia Huck wrote: -> -> On Mon, 27 Jul 2020 15:38:12 +0800 -> -> Jason Wang wrote: -> -> -> -> > On 2020/7/27 下午2:43, Cornelia Huck wrote: -> -> > > On Sat, 25 Jul 2020 08:40:07 +0800 -> -> > > Jason Wang wrote: -> -> > > > On 2020/7/24 下午11:34, Cornelia Huck wrote: -> -> > > > > On Fri, 24 Jul 2020 11:17:57 -0400 -> -> > > > > "Michael S. Tsirkin" wrote: -> -> > > > > > On Fri, Jul 24, 2020 at 04:56:27PM +0200, Cornelia Huck wrote: -> -> > > > > > > On Fri, 24 Jul 2020 09:30:58 -0400 -> -> > > > > > > "Michael S. Tsirkin" wrote: -> -> > > > > > > > On Fri, Jul 24, 2020 at 03:27:18PM +0200, Cornelia Huck wrote: -> -> > > > > > > > > When I start qemu with a second virtio-net-ccw device (i.e. -> -> > > > > > > > > adding -> -> > > > > > > > > -device virtio-net-ccw in addition to the autogenerated -> -> > > > > > > > > device), I get -> -> > > > > > > > > a segfault. gdb points to -> -> > > > > > > > > -> -> > > > > > > > > #0 0x000055d6ab52681d in virtio_net_get_config -> -> > > > > > > > > (vdev=, -> -> > > > > > > > > config=0x55d6ad9e3f80 "RT") at -> -> > > > > > > > > /home/cohuck/git/qemu/hw/net/virtio-net.c:146 -> -> > > > > > > > > 146 if (nc->peer->info->type == -> -> > > > > > > > > NET_CLIENT_DRIVER_VHOST_VDPA) { -> -> > > > > > > > > -> -> > > > > > > > > (backtrace doesn't go further) -> -> > > > > > > The core was incomplete, but running under gdb directly shows -> -> > > > > > > that it -> -> > > > > > > is just a bog-standard config space access (first for that -> -> > > > > > > device). -> -> > > > > > > -> -> > > > > > > The cause of the crash is that nc->peer is not set... no idea -> -> > > > > > > how that -> -> > > > > > > can happen, not that familiar with that part of QEMU. (Should -> -> > > > > > > the code -> -> > > > > > > check, or is that really something that should not happen?) -> -> > > > > > > -> -> > > > > > > What I don't understand is why it is set correctly for the -> -> > > > > > > first, -> -> > > > > > > autogenerated virtio-net-ccw device, but not for the second -> -> > > > > > > one, and -> -> > > > > > > why virtio-net-pci doesn't show these problems. The only -> -> > > > > > > difference -> -> > > > > > > between -ccw and -pci that comes to my mind here is that config -> -> > > > > > > space -> -> > > > > > > accesses for ccw are done via an asynchronous operation, so -> -> > > > > > > timing -> -> > > > > > > might be different. -> -> > > > > > Hopefully Jason has an idea. Could you post a full command line -> -> > > > > > please? Do you need a working guest to trigger this? Does this -> -> > > > > > trigger -> -> > > > > > on an x86 host? -> -> > > > > Yes, it does trigger with tcg-on-x86 as well. I've been using -> -> > > > > -> -> > > > > s390x-softmmu/qemu-system-s390x -M s390-ccw-virtio,accel=tcg -cpu -> -> > > > > qemu,zpci=on -> -> > > > > -m 1024 -nographic -device virtio-scsi-ccw,id=scsi0,devno=fe.0.0001 -> -> > > > > -drive file=/path/to/image,format=qcow2,if=none,id=drive-scsi0-0-0-0 -> -> > > > > -device -> -> > > > > scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0-0-0-0,id=scsi0-0-0-0,bootindex=1 -> -> > > > > -device virtio-net-ccw -> -> > > > > -> -> > > > > It seems it needs the guest actually doing something with the nics; -> -> > > > > I -> -> > > > > cannot reproduce the crash if I use the old advent calendar moon -> -> > > > > buggy -> -> > > > > image and just add a virtio-net-ccw device. -> -> > > > > -> -> > > > > (I don't think it's a problem with my local build, as I see the -> -> > > > > problem -> -> > > > > both on my laptop and on an LPAR.) -> -> > > > It looks to me we forget the check the existence of peer. -> -> > > > -> -> > > > Please try the attached patch to see if it works. -> -> > > Thanks, that patch gets my guest up and running again. So, FWIW, -> -> > > -> -> > > Tested-by: Cornelia Huck -> -> > > -> -> > > Any idea why this did not hit with virtio-net-pci (or the autogenerated -> -> > > virtio-net-ccw device)? -> -> > -> -> > It can be hit with virtio-net-pci as well (just start without peer). -> -> Hm, I had not been able to reproduce the crash with a 'naked' -device -> -> virtio-net-pci. But checking seems to be the right idea anyway. -> -> -> -Sorry for being unclear, I meant for networking part, you just need start -> -without peer, and you need a real guest (any Linux) that is trying to access -> -the config space of virtio-net. -> -> -Thanks -A pxe guest will do it, but that doesn't support ccw, right? - -I'm still unclear why this triggers with ccw but not pci - -any idea? - -> -> -> -> -> > For autogenerated virtio-net-cww, I think the reason is that it has -> -> > already had a peer set. -> -> Ok, that might well be. -> -> -> -> - -On 2020/7/27 下午7:43, Michael S. Tsirkin wrote: -On Mon, Jul 27, 2020 at 04:51:23PM +0800, Jason Wang wrote: -On 2020/7/27 下午4:41, Cornelia Huck wrote: -On Mon, 27 Jul 2020 15:38:12 +0800 -Jason Wang wrote: -On 2020/7/27 下午2:43, Cornelia Huck wrote: -On Sat, 25 Jul 2020 08:40:07 +0800 -Jason Wang wrote: -On 2020/7/24 下午11:34, Cornelia Huck wrote: -On Fri, 24 Jul 2020 11:17:57 -0400 -"Michael S. Tsirkin" wrote: -On Fri, Jul 24, 2020 at 04:56:27PM +0200, Cornelia Huck wrote: -On Fri, 24 Jul 2020 09:30:58 -0400 -"Michael S. Tsirkin" wrote: -On Fri, Jul 24, 2020 at 03:27:18PM +0200, Cornelia Huck wrote: -When I start qemu with a second virtio-net-ccw device (i.e. adding --device virtio-net-ccw in addition to the autogenerated device), I get -a segfault. gdb points to - -#0 0x000055d6ab52681d in virtio_net_get_config (vdev=, - config=0x55d6ad9e3f80 "RT") at -/home/cohuck/git/qemu/hw/net/virtio-net.c:146 -146 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { - -(backtrace doesn't go further) -The core was incomplete, but running under gdb directly shows that it -is just a bog-standard config space access (first for that device). - -The cause of the crash is that nc->peer is not set... no idea how that -can happen, not that familiar with that part of QEMU. (Should the code -check, or is that really something that should not happen?) - -What I don't understand is why it is set correctly for the first, -autogenerated virtio-net-ccw device, but not for the second one, and -why virtio-net-pci doesn't show these problems. The only difference -between -ccw and -pci that comes to my mind here is that config space -accesses for ccw are done via an asynchronous operation, so timing -might be different. -Hopefully Jason has an idea. Could you post a full command line -please? Do you need a working guest to trigger this? Does this trigger -on an x86 host? -Yes, it does trigger with tcg-on-x86 as well. I've been using - -s390x-softmmu/qemu-system-s390x -M s390-ccw-virtio,accel=tcg -cpu qemu,zpci=on --m 1024 -nographic -device virtio-scsi-ccw,id=scsi0,devno=fe.0.0001 --drive file=/path/to/image,format=qcow2,if=none,id=drive-scsi0-0-0-0 --device -scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0-0-0-0,id=scsi0-0-0-0,bootindex=1 --device virtio-net-ccw - -It seems it needs the guest actually doing something with the nics; I -cannot reproduce the crash if I use the old advent calendar moon buggy -image and just add a virtio-net-ccw device. - -(I don't think it's a problem with my local build, as I see the problem -both on my laptop and on an LPAR.) -It looks to me we forget the check the existence of peer. - -Please try the attached patch to see if it works. -Thanks, that patch gets my guest up and running again. So, FWIW, - -Tested-by: Cornelia Huck - -Any idea why this did not hit with virtio-net-pci (or the autogenerated -virtio-net-ccw device)? -It can be hit with virtio-net-pci as well (just start without peer). -Hm, I had not been able to reproduce the crash with a 'naked' -device -virtio-net-pci. But checking seems to be the right idea anyway. -Sorry for being unclear, I meant for networking part, you just need start -without peer, and you need a real guest (any Linux) that is trying to access -the config space of virtio-net. - -Thanks -A pxe guest will do it, but that doesn't support ccw, right? -Yes, it depends on the cli actually. -I'm still unclear why this triggers with ccw but not pci - -any idea? -I don't test pxe but I can reproduce this with pci (just start a linux -guest without a peer). -Thanks - -On Mon, Jul 27, 2020 at 08:44:09PM +0800, Jason Wang wrote: -> -> -On 2020/7/27 下午7:43, Michael S. Tsirkin wrote: -> -> On Mon, Jul 27, 2020 at 04:51:23PM +0800, Jason Wang wrote: -> -> > On 2020/7/27 下午4:41, Cornelia Huck wrote: -> -> > > On Mon, 27 Jul 2020 15:38:12 +0800 -> -> > > Jason Wang wrote: -> -> > > -> -> > > > On 2020/7/27 下午2:43, Cornelia Huck wrote: -> -> > > > > On Sat, 25 Jul 2020 08:40:07 +0800 -> -> > > > > Jason Wang wrote: -> -> > > > > > On 2020/7/24 下午11:34, Cornelia Huck wrote: -> -> > > > > > > On Fri, 24 Jul 2020 11:17:57 -0400 -> -> > > > > > > "Michael S. Tsirkin" wrote: -> -> > > > > > > > On Fri, Jul 24, 2020 at 04:56:27PM +0200, Cornelia Huck wrote: -> -> > > > > > > > > On Fri, 24 Jul 2020 09:30:58 -0400 -> -> > > > > > > > > "Michael S. Tsirkin" wrote: -> -> > > > > > > > > > On Fri, Jul 24, 2020 at 03:27:18PM +0200, Cornelia Huck -> -> > > > > > > > > > wrote: -> -> > > > > > > > > > > When I start qemu with a second virtio-net-ccw device -> -> > > > > > > > > > > (i.e. adding -> -> > > > > > > > > > > -device virtio-net-ccw in addition to the autogenerated -> -> > > > > > > > > > > device), I get -> -> > > > > > > > > > > a segfault. gdb points to -> -> > > > > > > > > > > -> -> > > > > > > > > > > #0 0x000055d6ab52681d in virtio_net_get_config -> -> > > > > > > > > > > (vdev=, -> -> > > > > > > > > > > config=0x55d6ad9e3f80 "RT") at -> -> > > > > > > > > > > /home/cohuck/git/qemu/hw/net/virtio-net.c:146 -> -> > > > > > > > > > > 146 if (nc->peer->info->type == -> -> > > > > > > > > > > NET_CLIENT_DRIVER_VHOST_VDPA) { -> -> > > > > > > > > > > -> -> > > > > > > > > > > (backtrace doesn't go further) -> -> > > > > > > > > The core was incomplete, but running under gdb directly -> -> > > > > > > > > shows that it -> -> > > > > > > > > is just a bog-standard config space access (first for that -> -> > > > > > > > > device). -> -> > > > > > > > > -> -> > > > > > > > > The cause of the crash is that nc->peer is not set... no -> -> > > > > > > > > idea how that -> -> > > > > > > > > can happen, not that familiar with that part of QEMU. -> -> > > > > > > > > (Should the code -> -> > > > > > > > > check, or is that really something that should not happen?) -> -> > > > > > > > > -> -> > > > > > > > > What I don't understand is why it is set correctly for the -> -> > > > > > > > > first, -> -> > > > > > > > > autogenerated virtio-net-ccw device, but not for the second -> -> > > > > > > > > one, and -> -> > > > > > > > > why virtio-net-pci doesn't show these problems. The only -> -> > > > > > > > > difference -> -> > > > > > > > > between -ccw and -pci that comes to my mind here is that -> -> > > > > > > > > config space -> -> > > > > > > > > accesses for ccw are done via an asynchronous operation, so -> -> > > > > > > > > timing -> -> > > > > > > > > might be different. -> -> > > > > > > > Hopefully Jason has an idea. Could you post a full command -> -> > > > > > > > line -> -> > > > > > > > please? Do you need a working guest to trigger this? Does -> -> > > > > > > > this trigger -> -> > > > > > > > on an x86 host? -> -> > > > > > > Yes, it does trigger with tcg-on-x86 as well. I've been using -> -> > > > > > > -> -> > > > > > > s390x-softmmu/qemu-system-s390x -M s390-ccw-virtio,accel=tcg -> -> > > > > > > -cpu qemu,zpci=on -> -> > > > > > > -m 1024 -nographic -device -> -> > > > > > > virtio-scsi-ccw,id=scsi0,devno=fe.0.0001 -> -> > > > > > > -drive -> -> > > > > > > file=/path/to/image,format=qcow2,if=none,id=drive-scsi0-0-0-0 -> -> > > > > > > -device -> -> > > > > > > scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0-0-0-0,id=scsi0-0-0-0,bootindex=1 -> -> > > > > > > -device virtio-net-ccw -> -> > > > > > > -> -> > > > > > > It seems it needs the guest actually doing something with the -> -> > > > > > > nics; I -> -> > > > > > > cannot reproduce the crash if I use the old advent calendar -> -> > > > > > > moon buggy -> -> > > > > > > image and just add a virtio-net-ccw device. -> -> > > > > > > -> -> > > > > > > (I don't think it's a problem with my local build, as I see the -> -> > > > > > > problem -> -> > > > > > > both on my laptop and on an LPAR.) -> -> > > > > > It looks to me we forget the check the existence of peer. -> -> > > > > > -> -> > > > > > Please try the attached patch to see if it works. -> -> > > > > Thanks, that patch gets my guest up and running again. So, FWIW, -> -> > > > > -> -> > > > > Tested-by: Cornelia Huck -> -> > > > > -> -> > > > > Any idea why this did not hit with virtio-net-pci (or the -> -> > > > > autogenerated -> -> > > > > virtio-net-ccw device)? -> -> > > > It can be hit with virtio-net-pci as well (just start without peer). -> -> > > Hm, I had not been able to reproduce the crash with a 'naked' -device -> -> > > virtio-net-pci. But checking seems to be the right idea anyway. -> -> > Sorry for being unclear, I meant for networking part, you just need start -> -> > without peer, and you need a real guest (any Linux) that is trying to -> -> > access -> -> > the config space of virtio-net. -> -> > -> -> > Thanks -> -> A pxe guest will do it, but that doesn't support ccw, right? -> -> -> -Yes, it depends on the cli actually. -> -> -> -> -> -> I'm still unclear why this triggers with ccw but not pci - -> -> any idea? -> -> -> -I don't test pxe but I can reproduce this with pci (just start a linux guest -> -without a peer). -> -> -Thanks -> -Might be a good addition to a unit test. Not sure what would the -test do exactly: just make sure guest runs? Looks like a lot of work -for an empty test ... maybe we can poke at the guest config with -qtest commands at least. - --- -MST - -On 2020/7/27 下午9:16, Michael S. Tsirkin wrote: -On Mon, Jul 27, 2020 at 08:44:09PM +0800, Jason Wang wrote: -On 2020/7/27 下午7:43, Michael S. Tsirkin wrote: -On Mon, Jul 27, 2020 at 04:51:23PM +0800, Jason Wang wrote: -On 2020/7/27 下午4:41, Cornelia Huck wrote: -On Mon, 27 Jul 2020 15:38:12 +0800 -Jason Wang wrote: -On 2020/7/27 下午2:43, Cornelia Huck wrote: -On Sat, 25 Jul 2020 08:40:07 +0800 -Jason Wang wrote: -On 2020/7/24 下午11:34, Cornelia Huck wrote: -On Fri, 24 Jul 2020 11:17:57 -0400 -"Michael S. Tsirkin" wrote: -On Fri, Jul 24, 2020 at 04:56:27PM +0200, Cornelia Huck wrote: -On Fri, 24 Jul 2020 09:30:58 -0400 -"Michael S. Tsirkin" wrote: -On Fri, Jul 24, 2020 at 03:27:18PM +0200, Cornelia Huck wrote: -When I start qemu with a second virtio-net-ccw device (i.e. adding --device virtio-net-ccw in addition to the autogenerated device), I get -a segfault. gdb points to - -#0 0x000055d6ab52681d in virtio_net_get_config (vdev=, - config=0x55d6ad9e3f80 "RT") at -/home/cohuck/git/qemu/hw/net/virtio-net.c:146 -146 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { - -(backtrace doesn't go further) -The core was incomplete, but running under gdb directly shows that it -is just a bog-standard config space access (first for that device). - -The cause of the crash is that nc->peer is not set... no idea how that -can happen, not that familiar with that part of QEMU. (Should the code -check, or is that really something that should not happen?) - -What I don't understand is why it is set correctly for the first, -autogenerated virtio-net-ccw device, but not for the second one, and -why virtio-net-pci doesn't show these problems. The only difference -between -ccw and -pci that comes to my mind here is that config space -accesses for ccw are done via an asynchronous operation, so timing -might be different. -Hopefully Jason has an idea. Could you post a full command line -please? Do you need a working guest to trigger this? Does this trigger -on an x86 host? -Yes, it does trigger with tcg-on-x86 as well. I've been using - -s390x-softmmu/qemu-system-s390x -M s390-ccw-virtio,accel=tcg -cpu qemu,zpci=on --m 1024 -nographic -device virtio-scsi-ccw,id=scsi0,devno=fe.0.0001 --drive file=/path/to/image,format=qcow2,if=none,id=drive-scsi0-0-0-0 --device -scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0-0-0-0,id=scsi0-0-0-0,bootindex=1 --device virtio-net-ccw - -It seems it needs the guest actually doing something with the nics; I -cannot reproduce the crash if I use the old advent calendar moon buggy -image and just add a virtio-net-ccw device. - -(I don't think it's a problem with my local build, as I see the problem -both on my laptop and on an LPAR.) -It looks to me we forget the check the existence of peer. - -Please try the attached patch to see if it works. -Thanks, that patch gets my guest up and running again. So, FWIW, - -Tested-by: Cornelia Huck - -Any idea why this did not hit with virtio-net-pci (or the autogenerated -virtio-net-ccw device)? -It can be hit with virtio-net-pci as well (just start without peer). -Hm, I had not been able to reproduce the crash with a 'naked' -device -virtio-net-pci. But checking seems to be the right idea anyway. -Sorry for being unclear, I meant for networking part, you just need start -without peer, and you need a real guest (any Linux) that is trying to access -the config space of virtio-net. - -Thanks -A pxe guest will do it, but that doesn't support ccw, right? -Yes, it depends on the cli actually. -I'm still unclear why this triggers with ccw but not pci - -any idea? -I don't test pxe but I can reproduce this with pci (just start a linux guest -without a peer). - -Thanks -Might be a good addition to a unit test. Not sure what would the -test do exactly: just make sure guest runs? Looks like a lot of work -for an empty test ... maybe we can poke at the guest config with -qtest commands at least. -That should work or we can simply extend the exist virtio-net qtest to -do that. -Thanks - diff --git a/classification_output/01/mistranslation/4412535 b/classification_output/01/mistranslation/4412535 deleted file mode 100644 index 97712c2f5..000000000 --- a/classification_output/01/mistranslation/4412535 +++ /dev/null @@ -1,348 +0,0 @@ -mistranslation: 0.800 -other: 0.786 -instruction: 0.751 -semantic: 0.737 - -[BUG] accel/tcg: cpu_exec_longjmp_cleanup: assertion failed: (cpu == current_cpu) - -It seems there is a bug in SIGALRM handling when 486 system emulates x86_64 -code. - -This code: - -#include -#include -#include -#include -#include - -pthread_t thread1, thread2; - -// Signal handler for SIGALRM -void alarm_handler(int sig) { - // Do nothing, just wake up the other thread -} - -// Thread 1 function -void* thread1_func(void* arg) { - // Set up the signal handler for SIGALRM - signal(SIGALRM, alarm_handler); - - // Wait for 5 seconds - sleep(1); - - // Send SIGALRM signal to thread 2 - pthread_kill(thread2, SIGALRM); - - return NULL; -} - -// Thread 2 function -void* thread2_func(void* arg) { - // Wait for the SIGALRM signal - pause(); - - printf("Thread 2 woke up!\n"); - - return NULL; -} - -int main() { - // Create thread 1 - if (pthread_create(&thread1, NULL, thread1_func, NULL) != 0) { - fprintf(stderr, "Failed to create thread 1\n"); - return 1; - } - - // Create thread 2 - if (pthread_create(&thread2, NULL, thread2_func, NULL) != 0) { - fprintf(stderr, "Failed to create thread 2\n"); - return 1; - } - - // Wait for both threads to finish - pthread_join(thread1, NULL); - pthread_join(thread2, NULL); - - return 0; -} - - -Fails with this -strace log (there are also unsupported syscalls 334 and 435, -but it seems it doesn't affect the code much): - -... -736 rt_sigaction(SIGALRM,0x000000001123ec20,0x000000001123ecc0) = 0 -736 clock_nanosleep(CLOCK_REALTIME,0,{tv_sec = 1,tv_nsec = 0},{tv_sec = -1,tv_nsec = 0}) -736 rt_sigprocmask(SIG_BLOCK,0x00000000109fad20,0x0000000010800b38,8) = 0 -736 Unknown syscall 435 -736 -clone(CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID| - ... -736 rt_sigprocmask(SIG_SETMASK,0x0000000010800b38,NULL,8) -736 set_robust_list(0x11a419a0,0) = -1 errno=38 (Function not implemented) -736 rt_sigprocmask(SIG_SETMASK,0x0000000011a41fb0,NULL,8) = 0 - = 0 -736 pause(0,0,2,277186368,0,295966400) -736 -futex(0x000000001123f990,FUTEX_CLOCK_REALTIME|FUTEX_WAIT_BITSET,738,NULL,NULL,0) - = 0 -736 rt_sigprocmask(SIG_BLOCK,0x00000000109fad20,0x000000001123ee88,8) = 0 -736 getpid() = 736 -736 tgkill(736,739,SIGALRM) = 0 - = -1 errno=4 (Interrupted system call) ---- SIGALRM {si_signo=SIGALRM, si_code=SI_TKILL, si_pid=736, si_uid=0} --- -0x48874a != 0x3c69e10 -736 rt_sigprocmask(SIG_SETMASK,0x000000001123ee88,NULL,8) = 0 -** -ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion failed: -(cpu == current_cpu) -Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion -failed: (cpu == current_cpu) -0x48874a != 0x3c69e10 -** -ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion failed: -(cpu == current_cpu) -Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion -failed: (cpu == current_cpu) -# - -The code fails either with or without -singlestep, the command line: - -/usr/bin/qemu-x86_64 -L /opt/x86_64 -strace -singlestep /opt/x86_64/alarm.bin - -Source code of QEMU 8.1.1 was modified with patch "[PATCH] qemu/timer: Don't -use RDTSC on i486" [1], -with added few ioctls (not relevant) and cpu_exec_longjmp_cleanup() now prints -current pointers of -cpu and current_cpu (line "0x48874a != 0x3c69e10"). - -config.log (built as a part of buildroot, basically the minimal possible -configuration for running x86_64 on 486): - -# Configured with: -'/mnt/hd_8tb_p1/p1/home/crossgen/buildroot_486_2/output/build/qemu-8.1.1/configure' - '--prefix=/usr' -'--cross-prefix=/mnt/hd_8tb_p1/p1/home/crossgen/buildroot_486_2/output/host/bin/i486-buildroot-linux-gnu-' - '--audio-drv-list=' -'--python=/mnt/hd_8tb_p1/p1/home/crossgen/buildroot_486_2/output/host/bin/python3' - -'--ninja=/mnt/hd_8tb_p1/p1/home/crossgen/buildroot_486_2/output/host/bin/ninja' -'--disable-alsa' '--disable-bpf' '--disable-brlapi' '--disable-bsd-user' -'--disable-cap-ng' '--disable-capstone' '--disable-containers' -'--disable-coreaudio' '--disable-curl' '--disable-curses' -'--disable-dbus-display' '--disable-docs' '--disable-dsound' '--disable-hvf' -'--disable-jack' '--disable-libiscsi' '--disable-linux-aio' -'--disable-linux-io-uring' '--disable-malloc-trim' '--disable-membarrier' -'--disable-mpath' '--disable-netmap' '--disable-opengl' '--disable-oss' -'--disable-pa' '--disable-rbd' '--disable-sanitizers' '--disable-selinux' -'--disable-sparse' '--disable-strip' '--disable-vde' '--disable-vhost-crypto' -'--disable-vhost-user-blk-server' '--disable-virtfs' '--disable-whpx' -'--disable-xen' '--disable-attr' '--disable-kvm' '--disable-vhost-net' -'--disable-download' '--disable-hexagon-idef-parser' '--disable-system' -'--enable-linux-user' '--target-list=x86_64-linux-user' '--disable-vhost-user' -'--disable-slirp' '--disable-sdl' '--disable-fdt' '--enable-trace-backends=nop' -'--disable-tools' '--disable-guest-agent' '--disable-fuse' -'--disable-fuse-lseek' '--disable-seccomp' '--disable-libssh' -'--disable-libusb' '--disable-vnc' '--disable-nettle' '--disable-numa' -'--disable-pipewire' '--disable-spice' '--disable-usb-redir' -'--disable-install-blobs' - -Emulation of the same x86_64 code with qemu 6.2.0 installed on another x86_64 -native machine works fine. - -[1] -https://lists.nongnu.org/archive/html/qemu-devel/2023-11/msg05387.html -Best regards, -Petr - -On Sat, 25 Nov 2023 at 13:09, Petr Cvek wrote: -> -> -It seems there is a bug in SIGALRM handling when 486 system emulates x86_64 -> -code. -486 host is pretty well out of support currently. Can you reproduce -this on a less ancient host CPU type ? - -> -ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion failed: -> -(cpu == current_cpu) -> -Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: -> -assertion failed: (cpu == current_cpu) -> -0x48874a != 0x3c69e10 -> -** -> -ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion failed: -> -(cpu == current_cpu) -> -Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: -> -assertion failed: (cpu == current_cpu) -What compiler version do you build QEMU with? That -assert is there because we have seen some buggy compilers -in the past which don't correctly preserve the variable -value as the setjmp/longjmp spec requires them to. - -thanks --- PMM - -Dne 27. 11. 23 v 10:37 Peter Maydell napsal(a): -> -On Sat, 25 Nov 2023 at 13:09, Petr Cvek wrote: -> -> -> -> It seems there is a bug in SIGALRM handling when 486 system emulates x86_64 -> -> code. -> -> -486 host is pretty well out of support currently. Can you reproduce -> -this on a less ancient host CPU type ? -> -It seems it only fails when the code is compiled for i486. QEMU built with the -same compiler with -march=i586 and above runs on the same physical hardware -without a problem. All -march= variants were executed on ryzen 3600. - -> -> ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion -> -> failed: (cpu == current_cpu) -> -> Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: -> -> assertion failed: (cpu == current_cpu) -> -> 0x48874a != 0x3c69e10 -> -> ** -> -> ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion -> -> failed: (cpu == current_cpu) -> -> Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: -> -> assertion failed: (cpu == current_cpu) -> -> -What compiler version do you build QEMU with? That -> -assert is there because we have seen some buggy compilers -> -in the past which don't correctly preserve the variable -> -value as the setjmp/longjmp spec requires them to. -> -i486 and i586+ code variants were compiled with GCC 13.2.0 (more exactly, -slackware64 current multilib distribution). - -i486 binary which runs on the real 486 is also GCC 13.2.0 and installed as a -part of the buildroot crosscompiler (about two week old git snapshot). - -> -thanks -> --- PMM -best regards, -Petr - -On 11/25/23 07:08, Petr Cvek wrote: -ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion failed: -(cpu == current_cpu) -Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion -failed: (cpu == current_cpu) -# - -The code fails either with or without -singlestep, the command line: - -/usr/bin/qemu-x86_64 -L /opt/x86_64 -strace -singlestep /opt/x86_64/alarm.bin - -Source code of QEMU 8.1.1 was modified with patch "[PATCH] qemu/timer: Don't use -RDTSC on i486" [1], -with added few ioctls (not relevant) and cpu_exec_longjmp_cleanup() now prints -current pointers of -cpu and current_cpu (line "0x48874a != 0x3c69e10"). -If you try this again with 8.2-rc2, you should not see an assertion failure. -You should see instead - -QEMU internal SIGILL {code=ILLOPC, addr=0x12345678} -which I think more accurately summarizes the situation of attempting RDTSC on hardware -that does not support it. -r~ - -Dne 29. 11. 23 v 15:25 Richard Henderson napsal(a): -> -On 11/25/23 07:08, Petr Cvek wrote: -> -> ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion -> -> failed: (cpu == current_cpu) -> -> Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: -> -> assertion failed: (cpu == current_cpu) -> -> # -> -> -> -> The code fails either with or without -singlestep, the command line: -> -> -> -> /usr/bin/qemu-x86_64 -L /opt/x86_64 -strace -singlestep -> -> /opt/x86_64/alarm.bin -> -> -> -> Source code of QEMU 8.1.1 was modified with patch "[PATCH] qemu/timer: Don't -> -> use RDTSC on i486" [1], -> -> with added few ioctls (not relevant) and cpu_exec_longjmp_cleanup() now -> -> prints current pointers of -> -> cpu and current_cpu (line "0x48874a != 0x3c69e10"). -> -> -> -If you try this again with 8.2-rc2, you should not see an assertion failure. -> -You should see instead -> -> -QEMU internal SIGILL {code=ILLOPC, addr=0x12345678} -> -> -which I think more accurately summarizes the situation of attempting RDTSC on -> -hardware that does not support it. -> -> -Compilation of vanilla qemu v8.2.0-rc2 with -march=i486 by GCC 13.2.0 and -running the resulting binary on ryzen still leads to: - -** -ERROR:../accel/tcg/cpu-exec.c:533:cpu_exec_longjmp_cleanup: assertion failed: -(cpu == current_cpu) -Bail out! ERROR:../accel/tcg/cpu-exec.c:533:cpu_exec_longjmp_cleanup: assertion -failed: (cpu == current_cpu) -Aborted - -> -> -r~ -Petr - diff --git a/classification_output/01/mistranslation/5373318 b/classification_output/01/mistranslation/5373318 deleted file mode 100644 index e4d4789c4..000000000 --- a/classification_output/01/mistranslation/5373318 +++ /dev/null @@ -1,692 +0,0 @@ -mistranslation: 0.881 -other: 0.839 -instruction: 0.755 -semantic: 0.752 - -[Qemu-devel] [BUG?] aio_get_linux_aio: Assertion `ctx->linux_aio' failed - -Hi, - -I am seeing some strange QEMU assertion failures for qemu on s390x, -which prevents a guest from starting. - -Git bisecting points to the following commit as the source of the error. - -commit ed6e2161715c527330f936d44af4c547f25f687e -Author: Nishanth Aravamudan -Date: Fri Jun 22 12:37:00 2018 -0700 - - linux-aio: properly bubble up errors from initialization - - laio_init() can fail for a couple of reasons, which will lead to a NULL - pointer dereference in laio_attach_aio_context(). - - To solve this, add a aio_setup_linux_aio() function which is called - early in raw_open_common. If this fails, propagate the error up. The - signature of aio_get_linux_aio() was not modified, because it seems - preferable to return the actual errno from the possible failing - initialization calls. - - Additionally, when the AioContext changes, we need to associate a - LinuxAioState with the new AioContext. Use the bdrv_attach_aio_context - callback and call the new aio_setup_linux_aio(), which will allocate a -new AioContext if needed, and return errors on failures. If it -fails for -any reason, fallback to threaded AIO with an error message, as the - device is already in-use by the guest. - - Add an assert that aio_get_linux_aio() cannot return NULL. - - Signed-off-by: Nishanth Aravamudan - Message-id: address@hidden - Signed-off-by: Stefan Hajnoczi -Not sure what is causing this assertion to fail. Here is the qemu -command line of the guest, from qemu log, which throws this error: -LC_ALL=C PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin -QEMU_AUDIO_DRV=none /usr/local/bin/qemu-system-s390x -name -guest=rt_vm1,debug-threads=on -S -object -secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-21-rt_vm1/master-key.aes --machine s390-ccw-virtio-2.12,accel=kvm,usb=off,dump-guest-core=off -m -1024 -realtime mlock=off -smp 4,sockets=4,cores=1,threads=1 -object -iothread,id=iothread1 -uuid 0cde16cd-091d-41bd-9ac2-5243df5c9a0d --display none -no-user-config -nodefaults -chardev -socket,id=charmonitor,fd=28,server,nowait -mon -chardev=charmonitor,id=monitor,mode=control -rtc base=utc -no-shutdown --boot strict=on -drive -file=/dev/mapper/360050763998b0883980000002a000031,format=raw,if=none,id=drive-virtio-disk0,cache=none,aio=native --device -virtio-blk-ccw,iothread=iothread1,scsi=off,devno=fe.0.0001,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1,write-cache=on --netdev tap,fd=30,id=hostnet0,vhost=on,vhostfd=31 -device -virtio-net-ccw,netdev=hostnet0,id=net0,mac=02:3a:c8:67:95:84,devno=fe.0.0000 --netdev tap,fd=32,id=hostnet1,vhost=on,vhostfd=33 -device -virtio-net-ccw,netdev=hostnet1,id=net1,mac=52:54:00:2a:e5:08,devno=fe.0.0002 --chardev pty,id=charconsole0 -device -sclpconsole,chardev=charconsole0,id=console0 -device -virtio-balloon-ccw,id=balloon0,devno=fe.3.ffba -sandbox -on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny --msg timestamp=on -2018-07-17 15:48:42.252+0000: Domain id=21 is tainted: high-privileges -2018-07-17T15:48:42.279380Z qemu-system-s390x: -chardev -pty,id=charconsole0: char device redirected to /dev/pts/3 (label -charconsole0) -qemu-system-s390x: util/async.c:339: aio_get_linux_aio: Assertion -`ctx->linux_aio' failed. -2018-07-17 15:48:43.309+0000: shutting down, reason=failed - - -Any help debugging this would be greatly appreciated. - -Thank you -Farhan - -On 17.07.2018 [13:25:53 -0400], Farhan Ali wrote: -> -Hi, -> -> -I am seeing some strange QEMU assertion failures for qemu on s390x, -> -which prevents a guest from starting. -> -> -Git bisecting points to the following commit as the source of the error. -> -> -commit ed6e2161715c527330f936d44af4c547f25f687e -> -Author: Nishanth Aravamudan -> -Date: Fri Jun 22 12:37:00 2018 -0700 -> -> -linux-aio: properly bubble up errors from initialization -> -> -laio_init() can fail for a couple of reasons, which will lead to a NULL -> -pointer dereference in laio_attach_aio_context(). -> -> -To solve this, add a aio_setup_linux_aio() function which is called -> -early in raw_open_common. If this fails, propagate the error up. The -> -signature of aio_get_linux_aio() was not modified, because it seems -> -preferable to return the actual errno from the possible failing -> -initialization calls. -> -> -Additionally, when the AioContext changes, we need to associate a -> -LinuxAioState with the new AioContext. Use the bdrv_attach_aio_context -> -callback and call the new aio_setup_linux_aio(), which will allocate a -> -new AioContext if needed, and return errors on failures. If it fails for -> -any reason, fallback to threaded AIO with an error message, as the -> -device is already in-use by the guest. -> -> -Add an assert that aio_get_linux_aio() cannot return NULL. -> -> -Signed-off-by: Nishanth Aravamudan -> -Message-id: address@hidden -> -Signed-off-by: Stefan Hajnoczi -> -> -> -Not sure what is causing this assertion to fail. Here is the qemu command -> -line of the guest, from qemu log, which throws this error: -> -> -> -LC_ALL=C PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin -> -QEMU_AUDIO_DRV=none /usr/local/bin/qemu-system-s390x -name -> -guest=rt_vm1,debug-threads=on -S -object -> -secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-21-rt_vm1/master-key.aes -> --machine s390-ccw-virtio-2.12,accel=kvm,usb=off,dump-guest-core=off -m 1024 -> --realtime mlock=off -smp 4,sockets=4,cores=1,threads=1 -object -> -iothread,id=iothread1 -uuid 0cde16cd-091d-41bd-9ac2-5243df5c9a0d -display -> -none -no-user-config -nodefaults -chardev -> -socket,id=charmonitor,fd=28,server,nowait -mon -> -chardev=charmonitor,id=monitor,mode=control -rtc base=utc -no-shutdown -boot -> -strict=on -drive -> -file=/dev/mapper/360050763998b0883980000002a000031,format=raw,if=none,id=drive-virtio-disk0,cache=none,aio=native -> --device -> -virtio-blk-ccw,iothread=iothread1,scsi=off,devno=fe.0.0001,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1,write-cache=on -> --netdev tap,fd=30,id=hostnet0,vhost=on,vhostfd=31 -device -> -virtio-net-ccw,netdev=hostnet0,id=net0,mac=02:3a:c8:67:95:84,devno=fe.0.0000 -> --netdev tap,fd=32,id=hostnet1,vhost=on,vhostfd=33 -device -> -virtio-net-ccw,netdev=hostnet1,id=net1,mac=52:54:00:2a:e5:08,devno=fe.0.0002 -> --chardev pty,id=charconsole0 -device -> -sclpconsole,chardev=charconsole0,id=console0 -device -> -virtio-balloon-ccw,id=balloon0,devno=fe.3.ffba -sandbox -> -on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny -msg -> -timestamp=on -> -> -> -> -2018-07-17 15:48:42.252+0000: Domain id=21 is tainted: high-privileges -> -2018-07-17T15:48:42.279380Z qemu-system-s390x: -chardev pty,id=charconsole0: -> -char device redirected to /dev/pts/3 (label charconsole0) -> -qemu-system-s390x: util/async.c:339: aio_get_linux_aio: Assertion -> -`ctx->linux_aio' failed. -> -2018-07-17 15:48:43.309+0000: shutting down, reason=failed -> -> -> -Any help debugging this would be greatly appreciated. -iiuc, this possibly implies AIO was not actually used previously on this -guest (it might have silently been falling back to threaded IO?). I -don't have access to s390x, but would it be possible to run qemu under -gdb and see if aio_setup_linux_aio is being called at all (I think it -might not be, but I'm not sure why), and if so, if it's for the context -in question? - -If it's not being called first, could you see what callpath is calling -aio_get_linux_aio when this assertion trips? - -Thanks! --Nish - -On 07/17/2018 04:52 PM, Nishanth Aravamudan wrote: -iiuc, this possibly implies AIO was not actually used previously on this -guest (it might have silently been falling back to threaded IO?). I -don't have access to s390x, but would it be possible to run qemu under -gdb and see if aio_setup_linux_aio is being called at all (I think it -might not be, but I'm not sure why), and if so, if it's for the context -in question? - -If it's not being called first, could you see what callpath is calling -aio_get_linux_aio when this assertion trips? - -Thanks! --Nish -Hi Nishant, -From the coredump of the guest this is the call trace that calls -aio_get_linux_aio: -Stack trace of thread 145158: -#0 0x000003ff94dbe274 raise (libc.so.6) -#1 0x000003ff94da39a8 abort (libc.so.6) -#2 0x000003ff94db62ce __assert_fail_base (libc.so.6) -#3 0x000003ff94db634c __assert_fail (libc.so.6) -#4 0x000002aa20db067a aio_get_linux_aio (qemu-system-s390x) -#5 0x000002aa20d229a8 raw_aio_plug (qemu-system-s390x) -#6 0x000002aa20d309ee bdrv_io_plug (qemu-system-s390x) -#7 0x000002aa20b5a8ea virtio_blk_handle_vq (qemu-system-s390x) -#8 0x000002aa20db2f6e aio_dispatch_handlers (qemu-system-s390x) -#9 0x000002aa20db3c34 aio_poll (qemu-system-s390x) -#10 0x000002aa20be32a2 iothread_run (qemu-system-s390x) -#11 0x000003ff94f879a8 start_thread (libpthread.so.0) -#12 0x000003ff94e797ee thread_start (libc.so.6) - - -Thanks for taking a look and responding. - -Thanks -Farhan - -On 07/18/2018 09:42 AM, Farhan Ali wrote: -On 07/17/2018 04:52 PM, Nishanth Aravamudan wrote: -iiuc, this possibly implies AIO was not actually used previously on this -guest (it might have silently been falling back to threaded IO?). I -don't have access to s390x, but would it be possible to run qemu under -gdb and see if aio_setup_linux_aio is being called at all (I think it -might not be, but I'm not sure why), and if so, if it's for the context -in question? - -If it's not being called first, could you see what callpath is calling -aio_get_linux_aio when this assertion trips? - -Thanks! --Nish -Hi Nishant, -From the coredump of the guest this is the call trace that calls -aio_get_linux_aio: -Stack trace of thread 145158: -#0  0x000003ff94dbe274 raise (libc.so.6) -#1  0x000003ff94da39a8 abort (libc.so.6) -#2  0x000003ff94db62ce __assert_fail_base (libc.so.6) -#3  0x000003ff94db634c __assert_fail (libc.so.6) -#4  0x000002aa20db067a aio_get_linux_aio (qemu-system-s390x) -#5  0x000002aa20d229a8 raw_aio_plug (qemu-system-s390x) -#6  0x000002aa20d309ee bdrv_io_plug (qemu-system-s390x) -#7  0x000002aa20b5a8ea virtio_blk_handle_vq (qemu-system-s390x) -#8  0x000002aa20db2f6e aio_dispatch_handlers (qemu-system-s390x) -#9  0x000002aa20db3c34 aio_poll (qemu-system-s390x) -#10 0x000002aa20be32a2 iothread_run (qemu-system-s390x) -#11 0x000003ff94f879a8 start_thread (libpthread.so.0) -#12 0x000003ff94e797ee thread_start (libc.so.6) - - -Thanks for taking a look and responding. - -Thanks -Farhan -Trying to debug a little further, the block device in this case is a -"host device". And looking at your commit carefully you use the -bdrv_attach_aio_context callback to setup a Linux AioContext. -For some reason the "host device" struct (BlockDriver bdrv_host_device -in block/file-posix.c) does not have a bdrv_attach_aio_context defined. -So a simple change of adding the callback to the struct solves the issue -and the guest starts fine. -diff --git a/block/file-posix.c b/block/file-posix.c -index 28824aa..b8d59fb 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -3135,6 +3135,7 @@ static BlockDriver bdrv_host_device = { - .bdrv_refresh_limits = raw_refresh_limits, - .bdrv_io_plug = raw_aio_plug, - .bdrv_io_unplug = raw_aio_unplug, -+ .bdrv_attach_aio_context = raw_aio_attach_aio_context, - - .bdrv_co_truncate = raw_co_truncate, - .bdrv_getlength = raw_getlength, -I am not too familiar with block device code in QEMU, so not sure if -this is the right fix or if there are some underlying problems. -Thanks -Farhan - -On 18.07.2018 [11:10:27 -0400], Farhan Ali wrote: -> -> -> -On 07/18/2018 09:42 AM, Farhan Ali wrote: -> -> -> -> -> -> On 07/17/2018 04:52 PM, Nishanth Aravamudan wrote: -> -> > iiuc, this possibly implies AIO was not actually used previously on this -> -> > guest (it might have silently been falling back to threaded IO?). I -> -> > don't have access to s390x, but would it be possible to run qemu under -> -> > gdb and see if aio_setup_linux_aio is being called at all (I think it -> -> > might not be, but I'm not sure why), and if so, if it's for the context -> -> > in question? -> -> > -> -> > If it's not being called first, could you see what callpath is calling -> -> > aio_get_linux_aio when this assertion trips? -> -> > -> -> > Thanks! -> -> > -Nish -> -> -> -> -> -> Hi Nishant, -> -> -> -> From the coredump of the guest this is the call trace that calls -> -> aio_get_linux_aio: -> -> -> -> -> -> Stack trace of thread 145158: -> -> #0  0x000003ff94dbe274 raise (libc.so.6) -> -> #1  0x000003ff94da39a8 abort (libc.so.6) -> -> #2  0x000003ff94db62ce __assert_fail_base (libc.so.6) -> -> #3  0x000003ff94db634c __assert_fail (libc.so.6) -> -> #4  0x000002aa20db067a aio_get_linux_aio (qemu-system-s390x) -> -> #5  0x000002aa20d229a8 raw_aio_plug (qemu-system-s390x) -> -> #6  0x000002aa20d309ee bdrv_io_plug (qemu-system-s390x) -> -> #7  0x000002aa20b5a8ea virtio_blk_handle_vq (qemu-system-s390x) -> -> #8  0x000002aa20db2f6e aio_dispatch_handlers (qemu-system-s390x) -> -> #9  0x000002aa20db3c34 aio_poll (qemu-system-s390x) -> -> #10 0x000002aa20be32a2 iothread_run (qemu-system-s390x) -> -> #11 0x000003ff94f879a8 start_thread (libpthread.so.0) -> -> #12 0x000003ff94e797ee thread_start (libc.so.6) -> -> -> -> -> -> Thanks for taking a look and responding. -> -> -> -> Thanks -> -> Farhan -> -> -> -> -> -> -> -> -Trying to debug a little further, the block device in this case is a "host -> -device". And looking at your commit carefully you use the -> -bdrv_attach_aio_context callback to setup a Linux AioContext. -> -> -For some reason the "host device" struct (BlockDriver bdrv_host_device in -> -block/file-posix.c) does not have a bdrv_attach_aio_context defined. -> -So a simple change of adding the callback to the struct solves the issue and -> -the guest starts fine. -> -> -> -diff --git a/block/file-posix.c b/block/file-posix.c -> -index 28824aa..b8d59fb 100644 -> ---- a/block/file-posix.c -> -+++ b/block/file-posix.c -> -@@ -3135,6 +3135,7 @@ static BlockDriver bdrv_host_device = { -> -.bdrv_refresh_limits = raw_refresh_limits, -> -.bdrv_io_plug = raw_aio_plug, -> -.bdrv_io_unplug = raw_aio_unplug, -> -+ .bdrv_attach_aio_context = raw_aio_attach_aio_context, -> -> -.bdrv_co_truncate = raw_co_truncate, -> -.bdrv_getlength = raw_getlength, -> -> -> -> -I am not too familiar with block device code in QEMU, so not sure if -> -this is the right fix or if there are some underlying problems. -Oh this is quite embarassing! I only added the bdrv_attach_aio_context -callback for the file-backed device. Your fix is definitely corect for -host device. Let me make sure there weren't any others missed and I will -send out a properly formatted patch. Thank you for the quick testing and -turnaround! - --Nish - -On 07/18/2018 08:52 PM, Nishanth Aravamudan wrote: -> -On 18.07.2018 [11:10:27 -0400], Farhan Ali wrote: -> -> -> -> -> -> On 07/18/2018 09:42 AM, Farhan Ali wrote: -> ->> -> ->> -> ->> On 07/17/2018 04:52 PM, Nishanth Aravamudan wrote: -> ->>> iiuc, this possibly implies AIO was not actually used previously on this -> ->>> guest (it might have silently been falling back to threaded IO?). I -> ->>> don't have access to s390x, but would it be possible to run qemu under -> ->>> gdb and see if aio_setup_linux_aio is being called at all (I think it -> ->>> might not be, but I'm not sure why), and if so, if it's for the context -> ->>> in question? -> ->>> -> ->>> If it's not being called first, could you see what callpath is calling -> ->>> aio_get_linux_aio when this assertion trips? -> ->>> -> ->>> Thanks! -> ->>> -Nish -> ->> -> ->> -> ->> Hi Nishant, -> ->> -> ->> From the coredump of the guest this is the call trace that calls -> ->> aio_get_linux_aio: -> ->> -> ->> -> ->> Stack trace of thread 145158: -> ->> #0  0x000003ff94dbe274 raise (libc.so.6) -> ->> #1  0x000003ff94da39a8 abort (libc.so.6) -> ->> #2  0x000003ff94db62ce __assert_fail_base (libc.so.6) -> ->> #3  0x000003ff94db634c __assert_fail (libc.so.6) -> ->> #4  0x000002aa20db067a aio_get_linux_aio (qemu-system-s390x) -> ->> #5  0x000002aa20d229a8 raw_aio_plug (qemu-system-s390x) -> ->> #6  0x000002aa20d309ee bdrv_io_plug (qemu-system-s390x) -> ->> #7  0x000002aa20b5a8ea virtio_blk_handle_vq (qemu-system-s390x) -> ->> #8  0x000002aa20db2f6e aio_dispatch_handlers (qemu-system-s390x) -> ->> #9  0x000002aa20db3c34 aio_poll (qemu-system-s390x) -> ->> #10 0x000002aa20be32a2 iothread_run (qemu-system-s390x) -> ->> #11 0x000003ff94f879a8 start_thread (libpthread.so.0) -> ->> #12 0x000003ff94e797ee thread_start (libc.so.6) -> ->> -> ->> -> ->> Thanks for taking a look and responding. -> ->> -> ->> Thanks -> ->> Farhan -> ->> -> ->> -> ->> -> -> -> -> Trying to debug a little further, the block device in this case is a "host -> -> device". And looking at your commit carefully you use the -> -> bdrv_attach_aio_context callback to setup a Linux AioContext. -> -> -> -> For some reason the "host device" struct (BlockDriver bdrv_host_device in -> -> block/file-posix.c) does not have a bdrv_attach_aio_context defined. -> -> So a simple change of adding the callback to the struct solves the issue and -> -> the guest starts fine. -> -> -> -> -> -> diff --git a/block/file-posix.c b/block/file-posix.c -> -> index 28824aa..b8d59fb 100644 -> -> --- a/block/file-posix.c -> -> +++ b/block/file-posix.c -> -> @@ -3135,6 +3135,7 @@ static BlockDriver bdrv_host_device = { -> -> .bdrv_refresh_limits = raw_refresh_limits, -> -> .bdrv_io_plug = raw_aio_plug, -> -> .bdrv_io_unplug = raw_aio_unplug, -> -> + .bdrv_attach_aio_context = raw_aio_attach_aio_context, -> -> -> -> .bdrv_co_truncate = raw_co_truncate, -> -> .bdrv_getlength = raw_getlength, -> -> -> -> -> -> -> -> I am not too familiar with block device code in QEMU, so not sure if -> -> this is the right fix or if there are some underlying problems. -> -> -Oh this is quite embarassing! I only added the bdrv_attach_aio_context -> -callback for the file-backed device. Your fix is definitely corect for -> -host device. Let me make sure there weren't any others missed and I will -> -send out a properly formatted patch. Thank you for the quick testing and -> -turnaround! -Farhan, can you respin your patch with proper sign-off and patch description? -Adding qemu-block. - -Hi Christian, - -On 19.07.2018 [08:55:20 +0200], Christian Borntraeger wrote: -> -> -> -On 07/18/2018 08:52 PM, Nishanth Aravamudan wrote: -> -> On 18.07.2018 [11:10:27 -0400], Farhan Ali wrote: -> ->> -> ->> -> ->> On 07/18/2018 09:42 AM, Farhan Ali wrote: - - -> ->> I am not too familiar with block device code in QEMU, so not sure if -> ->> this is the right fix or if there are some underlying problems. -> -> -> -> Oh this is quite embarassing! I only added the bdrv_attach_aio_context -> -> callback for the file-backed device. Your fix is definitely corect for -> -> host device. Let me make sure there weren't any others missed and I will -> -> send out a properly formatted patch. Thank you for the quick testing and -> -> turnaround! -> -> -Farhan, can you respin your patch with proper sign-off and patch description? -> -Adding qemu-block. -I sent it yesterday, sorry I didn't cc everyone from this e-mail: -http://lists.nongnu.org/archive/html/qemu-block/2018-07/msg00516.html -Thanks, -Nish - diff --git a/classification_output/01/mistranslation/5798945 b/classification_output/01/mistranslation/5798945 deleted file mode 100644 index 95c3f61d1..000000000 --- a/classification_output/01/mistranslation/5798945 +++ /dev/null @@ -1,43 +0,0 @@ -mistranslation: 0.472 -semantic: 0.387 -other: 0.345 -instruction: 0.261 - -[BUG][CPU hot-plug]CPU hot-plugs cause the qemu process to coredump - -Hello,Recently, when I was developing CPU hot-plugs under the loongarch -architecture, -I found that there was a problem with qemu cpu hot-plugs under x86 -architecture, -which caused the qemu process coredump when repeatedly inserting and -unplugging -the CPU when the TCG was accelerated. - - -The specific operation process is as follows: - -1.Use the following command to start the virtual machine - -qemu-system-x86_64 \ --machine q35  \ --cpu Broadwell-IBRS \ --smp 1,maxcpus=4,sockets=4,cores=1,threads=1 \ --m 4G \ --drive file=~/anolis-8.8.qcow2  \ --serial stdio   \ --monitor telnet:localhost:4498,server,nowait - - -2.Enter QEMU Monitor via telnet for repeated CPU insertion and unplugging - -telnet 127.0.0.1 4498 -(qemu) device_add -Broadwell-IBRS-x86_64-cpu,socket-id=1,core-id=0,thread-id=0,id=cpu1 -(qemu) device_del cpu1 -(qemu) device_add -Broadwell-IBRS-x86_64-cpu,socket-id=1,core-id=0,thread-id=0,id=cpu1 -3.You will notice that the QEMU process has a coredump - -# malloc(): unsorted double linked list corrupted -Aborted (core dumped) - diff --git a/classification_output/01/mistranslation/5933279 b/classification_output/01/mistranslation/5933279 deleted file mode 100644 index 719c03c74..000000000 --- a/classification_output/01/mistranslation/5933279 +++ /dev/null @@ -1,4581 +0,0 @@ -mistranslation: 0.962 -instruction: 0.930 -other: 0.930 -semantic: 0.923 - -[BUG, RFC] cpr-transfer: qxl guest driver crashes after migration - -Hi all, - -We've been experimenting with cpr-transfer migration mode recently and -have discovered the following issue with the guest QXL driver: - -Run migration source: -> -EMULATOR=/path/to/emulator -> -ROOTFS=/path/to/image -> -QMPSOCK=/var/run/alma8qmp-src.sock -> -> -$EMULATOR -enable-kvm \ -> --machine q35 \ -> --cpu host -smp 2 -m 2G \ -> --object -> -memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ram0,share=on\ -> --machine memory-backend=ram0 \ -> --machine aux-ram-share=on \ -> --drive file=$ROOTFS,media=disk,if=virtio \ -> --qmp unix:$QMPSOCK,server=on,wait=off \ -> --nographic \ -> --device qxl-vga -Run migration target: -> -EMULATOR=/path/to/emulator -> -ROOTFS=/path/to/image -> -QMPSOCK=/var/run/alma8qmp-dst.sock -> -> -> -> -$EMULATOR -enable-kvm \ -> --machine q35 \ -> --cpu host -smp 2 -m 2G \ -> --object -> -memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ram0,share=on\ -> --machine memory-backend=ram0 \ -> --machine aux-ram-share=on \ -> --drive file=$ROOTFS,media=disk,if=virtio \ -> --qmp unix:$QMPSOCK,server=on,wait=off \ -> --nographic \ -> --device qxl-vga \ -> --incoming tcp:0:44444 \ -> --incoming '{"channel-type": "cpr", "addr": { "transport": "socket", -> -"type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' -Launch the migration: -> -QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell -> -QMPSOCK=/var/run/alma8qmp-src.sock -> -> -$QMPSHELL -p $QMPSOCK < -migrate-set-parameters mode=cpr-transfer -> -migrate -> -channels=[{"channel-type":"main","addr":{"transport":"socket","type":"inet","host":"0","port":"44444"}},{"channel-type":"cpr","addr":{"transport":"socket","type":"unix","path":"/var/run/alma8cpr-dst.sock"}}] -> -EOF -Then, after a while, QXL guest driver on target crashes spewing the -following messages: -> -[ 73.962002] [TTM] Buffer eviction failed -> -[ 73.962072] qxl 0000:00:02.0: object_init failed for (3149824, 0x00000001) -> -[ 73.962081] [drm:qxl_alloc_bo_reserved [qxl]] *ERROR* failed to allocate -> -VRAM BO -That seems to be a known kernel QXL driver bug: -https://lore.kernel.org/all/20220907094423.93581-1-min_halo@163.com/T/ -https://lore.kernel.org/lkml/ZTgydqRlK6WX_b29@eldamar.lan/ -(the latter discussion contains that reproduce script which speeds up -the crash in the guest): -> -#!/bin/bash -> -> -chvt 3 -> -> -for j in $(seq 80); do -> -echo "$(date) starting round $j" -> -if [ "$(journalctl --boot | grep "failed to allocate VRAM BO")" != "" -> -]; then -> -echo "bug was reproduced after $j tries" -> -exit 1 -> -fi -> -for i in $(seq 100); do -> -dmesg > /dev/tty3 -> -done -> -done -> -> -echo "bug could not be reproduced" -> -exit 0 -The bug itself seems to remain unfixed, as I was able to reproduce that -with Fedora 41 guest, as well as AlmaLinux 8 guest. However our -cpr-transfer code also seems to be buggy as it triggers the crash - -without the cpr-transfer migration the above reproduce doesn't lead to -crash on the source VM. - -I suspect that, as cpr-transfer doesn't migrate the guest memory, but -rather passes it through the memory backend object, our code might -somehow corrupt the VRAM. However, I wasn't able to trace the -corruption so far. - -Could somebody help the investigation and take a look into this? Any -suggestions would be appreciated. Thanks! - -Andrey - -On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: -Hi all, - -We've been experimenting with cpr-transfer migration mode recently and -have discovered the following issue with the guest QXL driver: - -Run migration source: -EMULATOR=/path/to/emulator -ROOTFS=/path/to/image -QMPSOCK=/var/run/alma8qmp-src.sock - -$EMULATOR -enable-kvm \ - -machine q35 \ - -cpu host -smp 2 -m 2G \ - -object -memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ram0,share=on\ - -machine memory-backend=ram0 \ - -machine aux-ram-share=on \ - -drive file=$ROOTFS,media=disk,if=virtio \ - -qmp unix:$QMPSOCK,server=on,wait=off \ - -nographic \ - -device qxl-vga -Run migration target: -EMULATOR=/path/to/emulator -ROOTFS=/path/to/image -QMPSOCK=/var/run/alma8qmp-dst.sock -$EMULATOR -enable-kvm \ --machine q35 \ - -cpu host -smp 2 -m 2G \ - -object -memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ram0,share=on\ - -machine memory-backend=ram0 \ - -machine aux-ram-share=on \ - -drive file=$ROOTFS,media=disk,if=virtio \ - -qmp unix:$QMPSOCK,server=on,wait=off \ - -nographic \ - -device qxl-vga \ - -incoming tcp:0:44444 \ - -incoming '{"channel-type": "cpr", "addr": { "transport": "socket", "type": "unix", -"path": "/var/run/alma8cpr-dst.sock"}}' -Launch the migration: -QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell -QMPSOCK=/var/run/alma8qmp-src.sock - -$QMPSHELL -p $QMPSOCK < /dev/tty3 - done -done - -echo "bug could not be reproduced" -exit 0 -The bug itself seems to remain unfixed, as I was able to reproduce that -with Fedora 41 guest, as well as AlmaLinux 8 guest. However our -cpr-transfer code also seems to be buggy as it triggers the crash - -without the cpr-transfer migration the above reproduce doesn't lead to -crash on the source VM. - -I suspect that, as cpr-transfer doesn't migrate the guest memory, but -rather passes it through the memory backend object, our code might -somehow corrupt the VRAM. However, I wasn't able to trace the -corruption so far. - -Could somebody help the investigation and take a look into this? Any -suggestions would be appreciated. Thanks! -Possibly some memory region created by qxl is not being preserved. -Try adding these traces to see what is preserved: - --trace enable='*cpr*' --trace enable='*ram_alloc*' - -- Steve - -On 2/28/2025 1:13 PM, Steven Sistare wrote: -On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: -Hi all, - -We've been experimenting with cpr-transfer migration mode recently and -have discovered the following issue with the guest QXL driver: - -Run migration source: -EMULATOR=/path/to/emulator -ROOTFS=/path/to/image -QMPSOCK=/var/run/alma8qmp-src.sock - -$EMULATOR -enable-kvm \ -     -machine q35 \ -     -cpu host -smp 2 -m 2G \ -     -object -memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ram0,share=on\ -     -machine memory-backend=ram0 \ -     -machine aux-ram-share=on \ -     -drive file=$ROOTFS,media=disk,if=virtio \ -     -qmp unix:$QMPSOCK,server=on,wait=off \ -     -nographic \ -     -device qxl-vga -Run migration target: -EMULATOR=/path/to/emulator -ROOTFS=/path/to/image -QMPSOCK=/var/run/alma8qmp-dst.sock -$EMULATOR -enable-kvm \ -     -machine q35 \ -     -cpu host -smp 2 -m 2G \ -     -object -memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ram0,share=on\ -     -machine memory-backend=ram0 \ -     -machine aux-ram-share=on \ -     -drive file=$ROOTFS,media=disk,if=virtio \ -     -qmp unix:$QMPSOCK,server=on,wait=off \ -     -nographic \ -     -device qxl-vga \ -     -incoming tcp:0:44444 \ -     -incoming '{"channel-type": "cpr", "addr": { "transport": "socket", "type": "unix", -"path": "/var/run/alma8cpr-dst.sock"}}' -Launch the migration: -QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell -QMPSOCK=/var/run/alma8qmp-src.sock - -$QMPSHELL -p $QMPSOCK < /dev/tty3 -         done -done - -echo "bug could not be reproduced" -exit 0 -The bug itself seems to remain unfixed, as I was able to reproduce that -with Fedora 41 guest, as well as AlmaLinux 8 guest. However our -cpr-transfer code also seems to be buggy as it triggers the crash - -without the cpr-transfer migration the above reproduce doesn't lead to -crash on the source VM. - -I suspect that, as cpr-transfer doesn't migrate the guest memory, but -rather passes it through the memory backend object, our code might -somehow corrupt the VRAM.  However, I wasn't able to trace the -corruption so far. - -Could somebody help the investigation and take a look into this?  Any -suggestions would be appreciated.  Thanks! -Possibly some memory region created by qxl is not being preserved. -Try adding these traces to see what is preserved: - --trace enable='*cpr*' --trace enable='*ram_alloc*' -Also try adding this patch to see if it flags any ram blocks as not -compatible with cpr. A message is printed at migration start time. -1740667681-257312-1-git-send-email-steven.sistare@oracle.com -/">https://lore.kernel.org/qemu-devel/ -1740667681-257312-1-git-send-email-steven.sistare@oracle.com -/ -- Steve - -On 2/28/25 8:20 PM, Steven Sistare wrote: -> -On 2/28/2025 1:13 PM, Steven Sistare wrote: -> -> On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: -> ->> Hi all, -> ->> -> ->> We've been experimenting with cpr-transfer migration mode recently and -> ->> have discovered the following issue with the guest QXL driver: -> ->> -> ->> Run migration source: -> ->>> EMULATOR=/path/to/emulator -> ->>> ROOTFS=/path/to/image -> ->>> QMPSOCK=/var/run/alma8qmp-src.sock -> ->>> -> ->>> $EMULATOR -enable-kvm \ -> ->>>      -machine q35 \ -> ->>>      -cpu host -smp 2 -m 2G \ -> ->>>      -object memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ -> ->>> ram0,share=on\ -> ->>>      -machine memory-backend=ram0 \ -> ->>>      -machine aux-ram-share=on \ -> ->>>      -drive file=$ROOTFS,media=disk,if=virtio \ -> ->>>      -qmp unix:$QMPSOCK,server=on,wait=off \ -> ->>>      -nographic \ -> ->>>      -device qxl-vga -> ->> -> ->> Run migration target: -> ->>> EMULATOR=/path/to/emulator -> ->>> ROOTFS=/path/to/image -> ->>> QMPSOCK=/var/run/alma8qmp-dst.sock -> ->>> $EMULATOR -enable-kvm \ -> ->>>      -machine q35 \ -> ->>>      -cpu host -smp 2 -m 2G \ -> ->>>      -object memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ -> ->>> ram0,share=on\ -> ->>>      -machine memory-backend=ram0 \ -> ->>>      -machine aux-ram-share=on \ -> ->>>      -drive file=$ROOTFS,media=disk,if=virtio \ -> ->>>      -qmp unix:$QMPSOCK,server=on,wait=off \ -> ->>>      -nographic \ -> ->>>      -device qxl-vga \ -> ->>>      -incoming tcp:0:44444 \ -> ->>>      -incoming '{"channel-type": "cpr", "addr": { "transport": -> ->>> "socket", "type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' -> ->> -> ->> -> ->> Launch the migration: -> ->>> QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell -> ->>> QMPSOCK=/var/run/alma8qmp-src.sock -> ->>> -> ->>> $QMPSHELL -p $QMPSOCK < ->>>      migrate-set-parameters mode=cpr-transfer -> ->>>      migrate channels=[{"channel-type":"main","addr": -> ->>> {"transport":"socket","type":"inet","host":"0","port":"44444"}}, -> ->>> {"channel-type":"cpr","addr": -> ->>> {"transport":"socket","type":"unix","path":"/var/run/alma8cpr- -> ->>> dst.sock"}}] -> ->>> EOF -> ->> -> ->> Then, after a while, QXL guest driver on target crashes spewing the -> ->> following messages: -> ->>> [   73.962002] [TTM] Buffer eviction failed -> ->>> [   73.962072] qxl 0000:00:02.0: object_init failed for (3149824, -> ->>> 0x00000001) -> ->>> [   73.962081] [drm:qxl_alloc_bo_reserved [qxl]] *ERROR* failed to -> ->>> allocate VRAM BO -> ->> -> ->> That seems to be a known kernel QXL driver bug: -> ->> -> ->> -https://lore.kernel.org/all/20220907094423.93581-1-min_halo@163.com/T/ -> ->> -https://lore.kernel.org/lkml/ZTgydqRlK6WX_b29@eldamar.lan/ -> ->> -> ->> (the latter discussion contains that reproduce script which speeds up -> ->> the crash in the guest): -> ->>> #!/bin/bash -> ->>> -> ->>> chvt 3 -> ->>> -> ->>> for j in $(seq 80); do -> ->>>          echo "$(date) starting round $j" -> ->>>          if [ "$(journalctl --boot | grep "failed to allocate VRAM -> ->>> BO")" != "" ]; then -> ->>>                  echo "bug was reproduced after $j tries" -> ->>>                  exit 1 -> ->>>          fi -> ->>>          for i in $(seq 100); do -> ->>>                  dmesg > /dev/tty3 -> ->>>          done -> ->>> done -> ->>> -> ->>> echo "bug could not be reproduced" -> ->>> exit 0 -> ->> -> ->> The bug itself seems to remain unfixed, as I was able to reproduce that -> ->> with Fedora 41 guest, as well as AlmaLinux 8 guest. However our -> ->> cpr-transfer code also seems to be buggy as it triggers the crash - -> ->> without the cpr-transfer migration the above reproduce doesn't lead to -> ->> crash on the source VM. -> ->> -> ->> I suspect that, as cpr-transfer doesn't migrate the guest memory, but -> ->> rather passes it through the memory backend object, our code might -> ->> somehow corrupt the VRAM.  However, I wasn't able to trace the -> ->> corruption so far. -> ->> -> ->> Could somebody help the investigation and take a look into this?  Any -> ->> suggestions would be appreciated.  Thanks! -> -> -> -> Possibly some memory region created by qxl is not being preserved. -> -> Try adding these traces to see what is preserved: -> -> -> -> -trace enable='*cpr*' -> -> -trace enable='*ram_alloc*' -> -> -Also try adding this patch to see if it flags any ram blocks as not -> -compatible with cpr.  A message is printed at migration start time. -> - -https://lore.kernel.org/qemu-devel/1740667681-257312-1-git-send-email- -> -steven.sistare@oracle.com/ -> -> -- Steve -> -With the traces enabled + the "migration: ram block cpr blockers" patch -applied: - -Source: -> -cpr_find_fd pc.bios, id 0 returns -1 -> -cpr_save_fd pc.bios, id 0, fd 22 -> -qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 22 host -> -0x7fec18e00000 -> -cpr_find_fd pc.rom, id 0 returns -1 -> -cpr_save_fd pc.rom, id 0, fd 23 -> -qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 23 host -> -0x7fec18c00000 -> -cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns -1 -> -cpr_save_fd 0000:00:01.0/e1000e.rom, id 0, fd 24 -> -qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size 262144 fd -> -24 host 0x7fec18a00000 -> -cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns -1 -> -cpr_save_fd 0000:00:02.0/vga.vram, id 0, fd 25 -> -qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size 67108864 -> -fd 25 host 0x7feb77e00000 -> -cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns -1 -> -cpr_save_fd 0000:00:02.0/qxl.vrom, id 0, fd 27 -> -qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 fd 27 -> -host 0x7fec18800000 -> -cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns -1 -> -cpr_save_fd 0000:00:02.0/qxl.vram, id 0, fd 28 -> -qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size 67108864 -> -fd 28 host 0x7feb73c00000 -> -cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns -1 -> -cpr_save_fd 0000:00:02.0/qxl.rom, id 0, fd 34 -> -qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 fd 34 -> -host 0x7fec18600000 -> -cpr_find_fd /rom@etc/acpi/tables, id 0 returns -1 -> -cpr_save_fd /rom@etc/acpi/tables, id 0, fd 35 -> -qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size 2097152 fd 35 -> -host 0x7fec18200000 -> -cpr_find_fd /rom@etc/table-loader, id 0 returns -1 -> -cpr_save_fd /rom@etc/table-loader, id 0, fd 36 -> -qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 fd 36 -> -host 0x7feb8b600000 -> -cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns -1 -> -cpr_save_fd /rom@etc/acpi/rsdp, id 0, fd 37 -> -qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd 37 host -> -0x7feb8b400000 -> -> -cpr_state_save cpr-transfer mode -> -cpr_transfer_output /var/run/alma8cpr-dst.sock -Target: -> -cpr_transfer_input /var/run/alma8cpr-dst.sock -> -cpr_state_load cpr-transfer mode -> -cpr_find_fd pc.bios, id 0 returns 20 -> -qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 20 host -> -0x7fcdc9800000 -> -cpr_find_fd pc.rom, id 0 returns 19 -> -qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 19 host -> -0x7fcdc9600000 -> -cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns 18 -> -qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size 262144 fd -> -18 host 0x7fcdc9400000 -> -cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns 17 -> -qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size 67108864 -> -fd 17 host 0x7fcd27e00000 -> -cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns 16 -> -qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 fd 16 -> -host 0x7fcdc9200000 -> -cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns 15 -> -qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size 67108864 -> -fd 15 host 0x7fcd23c00000 -> -cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns 14 -> -qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 fd 14 -> -host 0x7fcdc8800000 -> -cpr_find_fd /rom@etc/acpi/tables, id 0 returns 13 -> -qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size 2097152 fd 13 -> -host 0x7fcdc8400000 -> -cpr_find_fd /rom@etc/table-loader, id 0 returns 11 -> -qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 fd 11 -> -host 0x7fcdc8200000 -> -cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns 10 -> -qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd 10 host -> -0x7fcd3be00000 -Looks like both vga.vram and qxl.vram are being preserved (with the same -addresses), and no incompatible ram blocks are found during migration. - -Andrey - -On 2/28/25 8:35 PM, Andrey Drobyshev wrote: -> -On 2/28/25 8:20 PM, Steven Sistare wrote: -> -> On 2/28/2025 1:13 PM, Steven Sistare wrote: -> ->> On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: -> ->>> Hi all, -> ->>> -> ->>> We've been experimenting with cpr-transfer migration mode recently and -> ->>> have discovered the following issue with the guest QXL driver: -> ->>> -> ->>> Run migration source: -> ->>>> EMULATOR=/path/to/emulator -> ->>>> ROOTFS=/path/to/image -> ->>>> QMPSOCK=/var/run/alma8qmp-src.sock -> ->>>> -> ->>>> $EMULATOR -enable-kvm \ -> ->>>>      -machine q35 \ -> ->>>>      -cpu host -smp 2 -m 2G \ -> ->>>>      -object memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ -> ->>>> ram0,share=on\ -> ->>>>      -machine memory-backend=ram0 \ -> ->>>>      -machine aux-ram-share=on \ -> ->>>>      -drive file=$ROOTFS,media=disk,if=virtio \ -> ->>>>      -qmp unix:$QMPSOCK,server=on,wait=off \ -> ->>>>      -nographic \ -> ->>>>      -device qxl-vga -> ->>> -> ->>> Run migration target: -> ->>>> EMULATOR=/path/to/emulator -> ->>>> ROOTFS=/path/to/image -> ->>>> QMPSOCK=/var/run/alma8qmp-dst.sock -> ->>>> $EMULATOR -enable-kvm \ -> ->>>>      -machine q35 \ -> ->>>>      -cpu host -smp 2 -m 2G \ -> ->>>>      -object memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ -> ->>>> ram0,share=on\ -> ->>>>      -machine memory-backend=ram0 \ -> ->>>>      -machine aux-ram-share=on \ -> ->>>>      -drive file=$ROOTFS,media=disk,if=virtio \ -> ->>>>      -qmp unix:$QMPSOCK,server=on,wait=off \ -> ->>>>      -nographic \ -> ->>>>      -device qxl-vga \ -> ->>>>      -incoming tcp:0:44444 \ -> ->>>>      -incoming '{"channel-type": "cpr", "addr": { "transport": -> ->>>> "socket", "type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' -> ->>> -> ->>> -> ->>> Launch the migration: -> ->>>> QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell -> ->>>> QMPSOCK=/var/run/alma8qmp-src.sock -> ->>>> -> ->>>> $QMPSHELL -p $QMPSOCK < ->>>>      migrate-set-parameters mode=cpr-transfer -> ->>>>      migrate channels=[{"channel-type":"main","addr": -> ->>>> {"transport":"socket","type":"inet","host":"0","port":"44444"}}, -> ->>>> {"channel-type":"cpr","addr": -> ->>>> {"transport":"socket","type":"unix","path":"/var/run/alma8cpr- -> ->>>> dst.sock"}}] -> ->>>> EOF -> ->>> -> ->>> Then, after a while, QXL guest driver on target crashes spewing the -> ->>> following messages: -> ->>>> [   73.962002] [TTM] Buffer eviction failed -> ->>>> [   73.962072] qxl 0000:00:02.0: object_init failed for (3149824, -> ->>>> 0x00000001) -> ->>>> [   73.962081] [drm:qxl_alloc_bo_reserved [qxl]] *ERROR* failed to -> ->>>> allocate VRAM BO -> ->>> -> ->>> That seems to be a known kernel QXL driver bug: -> ->>> -> ->>> -https://lore.kernel.org/all/20220907094423.93581-1-min_halo@163.com/T/ -> ->>> -https://lore.kernel.org/lkml/ZTgydqRlK6WX_b29@eldamar.lan/ -> ->>> -> ->>> (the latter discussion contains that reproduce script which speeds up -> ->>> the crash in the guest): -> ->>>> #!/bin/bash -> ->>>> -> ->>>> chvt 3 -> ->>>> -> ->>>> for j in $(seq 80); do -> ->>>>          echo "$(date) starting round $j" -> ->>>>          if [ "$(journalctl --boot | grep "failed to allocate VRAM -> ->>>> BO")" != "" ]; then -> ->>>>                  echo "bug was reproduced after $j tries" -> ->>>>                  exit 1 -> ->>>>          fi -> ->>>>          for i in $(seq 100); do -> ->>>>                  dmesg > /dev/tty3 -> ->>>>          done -> ->>>> done -> ->>>> -> ->>>> echo "bug could not be reproduced" -> ->>>> exit 0 -> ->>> -> ->>> The bug itself seems to remain unfixed, as I was able to reproduce that -> ->>> with Fedora 41 guest, as well as AlmaLinux 8 guest. However our -> ->>> cpr-transfer code also seems to be buggy as it triggers the crash - -> ->>> without the cpr-transfer migration the above reproduce doesn't lead to -> ->>> crash on the source VM. -> ->>> -> ->>> I suspect that, as cpr-transfer doesn't migrate the guest memory, but -> ->>> rather passes it through the memory backend object, our code might -> ->>> somehow corrupt the VRAM.  However, I wasn't able to trace the -> ->>> corruption so far. -> ->>> -> ->>> Could somebody help the investigation and take a look into this?  Any -> ->>> suggestions would be appreciated.  Thanks! -> ->> -> ->> Possibly some memory region created by qxl is not being preserved. -> ->> Try adding these traces to see what is preserved: -> ->> -> ->> -trace enable='*cpr*' -> ->> -trace enable='*ram_alloc*' -> -> -> -> Also try adding this patch to see if it flags any ram blocks as not -> -> compatible with cpr.  A message is printed at migration start time. -> ->  -https://lore.kernel.org/qemu-devel/1740667681-257312-1-git-send-email- -> -> steven.sistare@oracle.com/ -> -> -> -> - Steve -> -> -> -> -With the traces enabled + the "migration: ram block cpr blockers" patch -> -applied: -> -> -Source: -> -> cpr_find_fd pc.bios, id 0 returns -1 -> -> cpr_save_fd pc.bios, id 0, fd 22 -> -> qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 22 host -> -> 0x7fec18e00000 -> -> cpr_find_fd pc.rom, id 0 returns -1 -> -> cpr_save_fd pc.rom, id 0, fd 23 -> -> qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 23 host -> -> 0x7fec18c00000 -> -> cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns -1 -> -> cpr_save_fd 0000:00:01.0/e1000e.rom, id 0, fd 24 -> -> qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size 262144 fd -> -> 24 host 0x7fec18a00000 -> -> cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns -1 -> -> cpr_save_fd 0000:00:02.0/vga.vram, id 0, fd 25 -> -> qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size 67108864 -> -> fd 25 host 0x7feb77e00000 -> -> cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns -1 -> -> cpr_save_fd 0000:00:02.0/qxl.vrom, id 0, fd 27 -> -> qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 fd 27 -> -> host 0x7fec18800000 -> -> cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns -1 -> -> cpr_save_fd 0000:00:02.0/qxl.vram, id 0, fd 28 -> -> qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size 67108864 -> -> fd 28 host 0x7feb73c00000 -> -> cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns -1 -> -> cpr_save_fd 0000:00:02.0/qxl.rom, id 0, fd 34 -> -> qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 fd 34 -> -> host 0x7fec18600000 -> -> cpr_find_fd /rom@etc/acpi/tables, id 0 returns -1 -> -> cpr_save_fd /rom@etc/acpi/tables, id 0, fd 35 -> -> qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size 2097152 fd -> -> 35 host 0x7fec18200000 -> -> cpr_find_fd /rom@etc/table-loader, id 0 returns -1 -> -> cpr_save_fd /rom@etc/table-loader, id 0, fd 36 -> -> qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 fd 36 -> -> host 0x7feb8b600000 -> -> cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns -1 -> -> cpr_save_fd /rom@etc/acpi/rsdp, id 0, fd 37 -> -> qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd 37 host -> -> 0x7feb8b400000 -> -> -> -> cpr_state_save cpr-transfer mode -> -> cpr_transfer_output /var/run/alma8cpr-dst.sock -> -> -Target: -> -> cpr_transfer_input /var/run/alma8cpr-dst.sock -> -> cpr_state_load cpr-transfer mode -> -> cpr_find_fd pc.bios, id 0 returns 20 -> -> qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 20 host -> -> 0x7fcdc9800000 -> -> cpr_find_fd pc.rom, id 0 returns 19 -> -> qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 19 host -> -> 0x7fcdc9600000 -> -> cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns 18 -> -> qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size 262144 fd -> -> 18 host 0x7fcdc9400000 -> -> cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns 17 -> -> qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size 67108864 -> -> fd 17 host 0x7fcd27e00000 -> -> cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns 16 -> -> qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 fd 16 -> -> host 0x7fcdc9200000 -> -> cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns 15 -> -> qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size 67108864 -> -> fd 15 host 0x7fcd23c00000 -> -> cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns 14 -> -> qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 fd 14 -> -> host 0x7fcdc8800000 -> -> cpr_find_fd /rom@etc/acpi/tables, id 0 returns 13 -> -> qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size 2097152 fd -> -> 13 host 0x7fcdc8400000 -> -> cpr_find_fd /rom@etc/table-loader, id 0 returns 11 -> -> qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 fd 11 -> -> host 0x7fcdc8200000 -> -> cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns 10 -> -> qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd 10 host -> -> 0x7fcd3be00000 -> -> -Looks like both vga.vram and qxl.vram are being preserved (with the same -> -addresses), and no incompatible ram blocks are found during migration. -> -Sorry, addressed are not the same, of course. However corresponding ram -blocks do seem to be preserved and initialized. - -On 2/28/2025 1:37 PM, Andrey Drobyshev wrote: -On 2/28/25 8:35 PM, Andrey Drobyshev wrote: -On 2/28/25 8:20 PM, Steven Sistare wrote: -On 2/28/2025 1:13 PM, Steven Sistare wrote: -On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: -Hi all, - -We've been experimenting with cpr-transfer migration mode recently and -have discovered the following issue with the guest QXL driver: - -Run migration source: -EMULATOR=/path/to/emulator -ROOTFS=/path/to/image -QMPSOCK=/var/run/alma8qmp-src.sock - -$EMULATOR -enable-kvm \ -      -machine q35 \ -      -cpu host -smp 2 -m 2G \ -      -object memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ -ram0,share=on\ -      -machine memory-backend=ram0 \ -      -machine aux-ram-share=on \ -      -drive file=$ROOTFS,media=disk,if=virtio \ -      -qmp unix:$QMPSOCK,server=on,wait=off \ -      -nographic \ -      -device qxl-vga -Run migration target: -EMULATOR=/path/to/emulator -ROOTFS=/path/to/image -QMPSOCK=/var/run/alma8qmp-dst.sock -$EMULATOR -enable-kvm \ -      -machine q35 \ -      -cpu host -smp 2 -m 2G \ -      -object memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ -ram0,share=on\ -      -machine memory-backend=ram0 \ -      -machine aux-ram-share=on \ -      -drive file=$ROOTFS,media=disk,if=virtio \ -      -qmp unix:$QMPSOCK,server=on,wait=off \ -      -nographic \ -      -device qxl-vga \ -      -incoming tcp:0:44444 \ -      -incoming '{"channel-type": "cpr", "addr": { "transport": -"socket", "type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' -Launch the migration: -QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell -QMPSOCK=/var/run/alma8qmp-src.sock - -$QMPSHELL -p $QMPSOCK < /dev/tty3 -          done -done - -echo "bug could not be reproduced" -exit 0 -The bug itself seems to remain unfixed, as I was able to reproduce that -with Fedora 41 guest, as well as AlmaLinux 8 guest. However our -cpr-transfer code also seems to be buggy as it triggers the crash - -without the cpr-transfer migration the above reproduce doesn't lead to -crash on the source VM. - -I suspect that, as cpr-transfer doesn't migrate the guest memory, but -rather passes it through the memory backend object, our code might -somehow corrupt the VRAM.  However, I wasn't able to trace the -corruption so far. - -Could somebody help the investigation and take a look into this?  Any -suggestions would be appreciated.  Thanks! -Possibly some memory region created by qxl is not being preserved. -Try adding these traces to see what is preserved: - --trace enable='*cpr*' --trace enable='*ram_alloc*' -Also try adding this patch to see if it flags any ram blocks as not -compatible with cpr.  A message is printed at migration start time. -  -https://lore.kernel.org/qemu-devel/1740667681-257312-1-git-send-email- -steven.sistare@oracle.com/ - -- Steve -With the traces enabled + the "migration: ram block cpr blockers" patch -applied: - -Source: -cpr_find_fd pc.bios, id 0 returns -1 -cpr_save_fd pc.bios, id 0, fd 22 -qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 22 host -0x7fec18e00000 -cpr_find_fd pc.rom, id 0 returns -1 -cpr_save_fd pc.rom, id 0, fd 23 -qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 23 host -0x7fec18c00000 -cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns -1 -cpr_save_fd 0000:00:01.0/e1000e.rom, id 0, fd 24 -qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size 262144 fd 24 -host 0x7fec18a00000 -cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns -1 -cpr_save_fd 0000:00:02.0/vga.vram, id 0, fd 25 -qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size 67108864 fd -25 host 0x7feb77e00000 -cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns -1 -cpr_save_fd 0000:00:02.0/qxl.vrom, id 0, fd 27 -qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 fd 27 host -0x7fec18800000 -cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns -1 -cpr_save_fd 0000:00:02.0/qxl.vram, id 0, fd 28 -qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size 67108864 fd -28 host 0x7feb73c00000 -cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns -1 -cpr_save_fd 0000:00:02.0/qxl.rom, id 0, fd 34 -qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 fd 34 host -0x7fec18600000 -cpr_find_fd /rom@etc/acpi/tables, id 0 returns -1 -cpr_save_fd /rom@etc/acpi/tables, id 0, fd 35 -qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size 2097152 fd 35 -host 0x7fec18200000 -cpr_find_fd /rom@etc/table-loader, id 0 returns -1 -cpr_save_fd /rom@etc/table-loader, id 0, fd 36 -qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 fd 36 host -0x7feb8b600000 -cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns -1 -cpr_save_fd /rom@etc/acpi/rsdp, id 0, fd 37 -qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd 37 host -0x7feb8b400000 - -cpr_state_save cpr-transfer mode -cpr_transfer_output /var/run/alma8cpr-dst.sock -Target: -cpr_transfer_input /var/run/alma8cpr-dst.sock -cpr_state_load cpr-transfer mode -cpr_find_fd pc.bios, id 0 returns 20 -qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 20 host -0x7fcdc9800000 -cpr_find_fd pc.rom, id 0 returns 19 -qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 19 host -0x7fcdc9600000 -cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns 18 -qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size 262144 fd 18 -host 0x7fcdc9400000 -cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns 17 -qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size 67108864 fd -17 host 0x7fcd27e00000 -cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns 16 -qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 fd 16 host -0x7fcdc9200000 -cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns 15 -qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size 67108864 fd -15 host 0x7fcd23c00000 -cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns 14 -qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 fd 14 host -0x7fcdc8800000 -cpr_find_fd /rom@etc/acpi/tables, id 0 returns 13 -qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size 2097152 fd 13 -host 0x7fcdc8400000 -cpr_find_fd /rom@etc/table-loader, id 0 returns 11 -qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 fd 11 host -0x7fcdc8200000 -cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns 10 -qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd 10 host -0x7fcd3be00000 -Looks like both vga.vram and qxl.vram are being preserved (with the same -addresses), and no incompatible ram blocks are found during migration. -Sorry, addressed are not the same, of course. However corresponding ram -blocks do seem to be preserved and initialized. -So far, I have not reproduced the guest driver failure. - -However, I have isolated places where new QEMU improperly writes to -the qxl memory regions prior to starting the guest, by mmap'ing them -readonly after cpr: - - qemu_ram_alloc_internal() - if (reused && (strstr(name, "qxl") || strstr("name", "vga"))) - ram_flags |= RAM_READONLY; - new_block = qemu_ram_alloc_from_fd(...) - -I have attached a draft fix; try it and let me know. -My console window looks fine before and after cpr, using --vnc $hostip:0 -vga qxl - -- Steve -0001-hw-qxl-cpr-support-preliminary.patch -Description: -Text document - -On 3/4/25 9:05 PM, Steven Sistare wrote: -> -On 2/28/2025 1:37 PM, Andrey Drobyshev wrote: -> -> On 2/28/25 8:35 PM, Andrey Drobyshev wrote: -> ->> On 2/28/25 8:20 PM, Steven Sistare wrote: -> ->>> On 2/28/2025 1:13 PM, Steven Sistare wrote: -> ->>>> On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: -> ->>>>> Hi all, -> ->>>>> -> ->>>>> We've been experimenting with cpr-transfer migration mode recently -> ->>>>> and -> ->>>>> have discovered the following issue with the guest QXL driver: -> ->>>>> -> ->>>>> Run migration source: -> ->>>>>> EMULATOR=/path/to/emulator -> ->>>>>> ROOTFS=/path/to/image -> ->>>>>> QMPSOCK=/var/run/alma8qmp-src.sock -> ->>>>>> -> ->>>>>> $EMULATOR -enable-kvm \ -> ->>>>>>       -machine q35 \ -> ->>>>>>       -cpu host -smp 2 -m 2G \ -> ->>>>>>       -object memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ -> ->>>>>> ram0,share=on\ -> ->>>>>>       -machine memory-backend=ram0 \ -> ->>>>>>       -machine aux-ram-share=on \ -> ->>>>>>       -drive file=$ROOTFS,media=disk,if=virtio \ -> ->>>>>>       -qmp unix:$QMPSOCK,server=on,wait=off \ -> ->>>>>>       -nographic \ -> ->>>>>>       -device qxl-vga -> ->>>>> -> ->>>>> Run migration target: -> ->>>>>> EMULATOR=/path/to/emulator -> ->>>>>> ROOTFS=/path/to/image -> ->>>>>> QMPSOCK=/var/run/alma8qmp-dst.sock -> ->>>>>> $EMULATOR -enable-kvm \ -> ->>>>>>       -machine q35 \ -> ->>>>>>       -cpu host -smp 2 -m 2G \ -> ->>>>>>       -object memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ -> ->>>>>> ram0,share=on\ -> ->>>>>>       -machine memory-backend=ram0 \ -> ->>>>>>       -machine aux-ram-share=on \ -> ->>>>>>       -drive file=$ROOTFS,media=disk,if=virtio \ -> ->>>>>>       -qmp unix:$QMPSOCK,server=on,wait=off \ -> ->>>>>>       -nographic \ -> ->>>>>>       -device qxl-vga \ -> ->>>>>>       -incoming tcp:0:44444 \ -> ->>>>>>       -incoming '{"channel-type": "cpr", "addr": { "transport": -> ->>>>>> "socket", "type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' -> ->>>>> -> ->>>>> -> ->>>>> Launch the migration: -> ->>>>>> QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell -> ->>>>>> QMPSOCK=/var/run/alma8qmp-src.sock -> ->>>>>> -> ->>>>>> $QMPSHELL -p $QMPSOCK < ->>>>>>       migrate-set-parameters mode=cpr-transfer -> ->>>>>>       migrate channels=[{"channel-type":"main","addr": -> ->>>>>> {"transport":"socket","type":"inet","host":"0","port":"44444"}}, -> ->>>>>> {"channel-type":"cpr","addr": -> ->>>>>> {"transport":"socket","type":"unix","path":"/var/run/alma8cpr- -> ->>>>>> dst.sock"}}] -> ->>>>>> EOF -> ->>>>> -> ->>>>> Then, after a while, QXL guest driver on target crashes spewing the -> ->>>>> following messages: -> ->>>>>> [   73.962002] [TTM] Buffer eviction failed -> ->>>>>> [   73.962072] qxl 0000:00:02.0: object_init failed for (3149824, -> ->>>>>> 0x00000001) -> ->>>>>> [   73.962081] [drm:qxl_alloc_bo_reserved [qxl]] *ERROR* failed to -> ->>>>>> allocate VRAM BO -> ->>>>> -> ->>>>> That seems to be a known kernel QXL driver bug: -> ->>>>> -> ->>>>> -https://lore.kernel.org/all/20220907094423.93581-1- -> ->>>>> min_halo@163.com/T/ -> ->>>>> -https://lore.kernel.org/lkml/ZTgydqRlK6WX_b29@eldamar.lan/ -> ->>>>> -> ->>>>> (the latter discussion contains that reproduce script which speeds up -> ->>>>> the crash in the guest): -> ->>>>>> #!/bin/bash -> ->>>>>> -> ->>>>>> chvt 3 -> ->>>>>> -> ->>>>>> for j in $(seq 80); do -> ->>>>>>           echo "$(date) starting round $j" -> ->>>>>>           if [ "$(journalctl --boot | grep "failed to allocate VRAM -> ->>>>>> BO")" != "" ]; then -> ->>>>>>                   echo "bug was reproduced after $j tries" -> ->>>>>>                   exit 1 -> ->>>>>>           fi -> ->>>>>>           for i in $(seq 100); do -> ->>>>>>                   dmesg > /dev/tty3 -> ->>>>>>           done -> ->>>>>> done -> ->>>>>> -> ->>>>>> echo "bug could not be reproduced" -> ->>>>>> exit 0 -> ->>>>> -> ->>>>> The bug itself seems to remain unfixed, as I was able to reproduce -> ->>>>> that -> ->>>>> with Fedora 41 guest, as well as AlmaLinux 8 guest. However our -> ->>>>> cpr-transfer code also seems to be buggy as it triggers the crash - -> ->>>>> without the cpr-transfer migration the above reproduce doesn't -> ->>>>> lead to -> ->>>>> crash on the source VM. -> ->>>>> -> ->>>>> I suspect that, as cpr-transfer doesn't migrate the guest memory, but -> ->>>>> rather passes it through the memory backend object, our code might -> ->>>>> somehow corrupt the VRAM.  However, I wasn't able to trace the -> ->>>>> corruption so far. -> ->>>>> -> ->>>>> Could somebody help the investigation and take a look into this?  Any -> ->>>>> suggestions would be appreciated.  Thanks! -> ->>>> -> ->>>> Possibly some memory region created by qxl is not being preserved. -> ->>>> Try adding these traces to see what is preserved: -> ->>>> -> ->>>> -trace enable='*cpr*' -> ->>>> -trace enable='*ram_alloc*' -> ->>> -> ->>> Also try adding this patch to see if it flags any ram blocks as not -> ->>> compatible with cpr.  A message is printed at migration start time. -> ->>>   -https://lore.kernel.org/qemu-devel/1740667681-257312-1-git-send- -> ->>> email- -> ->>> steven.sistare@oracle.com/ -> ->>> -> ->>> - Steve -> ->>> -> ->> -> ->> With the traces enabled + the "migration: ram block cpr blockers" patch -> ->> applied: -> ->> -> ->> Source: -> ->>> cpr_find_fd pc.bios, id 0 returns -1 -> ->>> cpr_save_fd pc.bios, id 0, fd 22 -> ->>> qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 22 host -> ->>> 0x7fec18e00000 -> ->>> cpr_find_fd pc.rom, id 0 returns -1 -> ->>> cpr_save_fd pc.rom, id 0, fd 23 -> ->>> qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 23 host -> ->>> 0x7fec18c00000 -> ->>> cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns -1 -> ->>> cpr_save_fd 0000:00:01.0/e1000e.rom, id 0, fd 24 -> ->>> qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size -> ->>> 262144 fd 24 host 0x7fec18a00000 -> ->>> cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns -1 -> ->>> cpr_save_fd 0000:00:02.0/vga.vram, id 0, fd 25 -> ->>> qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size -> ->>> 67108864 fd 25 host 0x7feb77e00000 -> ->>> cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns -1 -> ->>> cpr_save_fd 0000:00:02.0/qxl.vrom, id 0, fd 27 -> ->>> qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 -> ->>> fd 27 host 0x7fec18800000 -> ->>> cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns -1 -> ->>> cpr_save_fd 0000:00:02.0/qxl.vram, id 0, fd 28 -> ->>> qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size -> ->>> 67108864 fd 28 host 0x7feb73c00000 -> ->>> cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns -1 -> ->>> cpr_save_fd 0000:00:02.0/qxl.rom, id 0, fd 34 -> ->>> qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 -> ->>> fd 34 host 0x7fec18600000 -> ->>> cpr_find_fd /rom@etc/acpi/tables, id 0 returns -1 -> ->>> cpr_save_fd /rom@etc/acpi/tables, id 0, fd 35 -> ->>> qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size -> ->>> 2097152 fd 35 host 0x7fec18200000 -> ->>> cpr_find_fd /rom@etc/table-loader, id 0 returns -1 -> ->>> cpr_save_fd /rom@etc/table-loader, id 0, fd 36 -> ->>> qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 -> ->>> fd 36 host 0x7feb8b600000 -> ->>> cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns -1 -> ->>> cpr_save_fd /rom@etc/acpi/rsdp, id 0, fd 37 -> ->>> qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd -> ->>> 37 host 0x7feb8b400000 -> ->>> -> ->>> cpr_state_save cpr-transfer mode -> ->>> cpr_transfer_output /var/run/alma8cpr-dst.sock -> ->> -> ->> Target: -> ->>> cpr_transfer_input /var/run/alma8cpr-dst.sock -> ->>> cpr_state_load cpr-transfer mode -> ->>> cpr_find_fd pc.bios, id 0 returns 20 -> ->>> qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 20 host -> ->>> 0x7fcdc9800000 -> ->>> cpr_find_fd pc.rom, id 0 returns 19 -> ->>> qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 19 host -> ->>> 0x7fcdc9600000 -> ->>> cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns 18 -> ->>> qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size -> ->>> 262144 fd 18 host 0x7fcdc9400000 -> ->>> cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns 17 -> ->>> qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size -> ->>> 67108864 fd 17 host 0x7fcd27e00000 -> ->>> cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns 16 -> ->>> qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 -> ->>> fd 16 host 0x7fcdc9200000 -> ->>> cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns 15 -> ->>> qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size -> ->>> 67108864 fd 15 host 0x7fcd23c00000 -> ->>> cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns 14 -> ->>> qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 -> ->>> fd 14 host 0x7fcdc8800000 -> ->>> cpr_find_fd /rom@etc/acpi/tables, id 0 returns 13 -> ->>> qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size -> ->>> 2097152 fd 13 host 0x7fcdc8400000 -> ->>> cpr_find_fd /rom@etc/table-loader, id 0 returns 11 -> ->>> qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 -> ->>> fd 11 host 0x7fcdc8200000 -> ->>> cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns 10 -> ->>> qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd -> ->>> 10 host 0x7fcd3be00000 -> ->> -> ->> Looks like both vga.vram and qxl.vram are being preserved (with the same -> ->> addresses), and no incompatible ram blocks are found during migration. -> -> -> -> Sorry, addressed are not the same, of course.  However corresponding ram -> -> blocks do seem to be preserved and initialized. -> -> -So far, I have not reproduced the guest driver failure. -> -> -However, I have isolated places where new QEMU improperly writes to -> -the qxl memory regions prior to starting the guest, by mmap'ing them -> -readonly after cpr: -> -> -  qemu_ram_alloc_internal() -> -    if (reused && (strstr(name, "qxl") || strstr("name", "vga"))) -> -        ram_flags |= RAM_READONLY; -> -    new_block = qemu_ram_alloc_from_fd(...) -> -> -I have attached a draft fix; try it and let me know. -> -My console window looks fine before and after cpr, using -> --vnc $hostip:0 -vga qxl -> -> -- Steve -Regarding the reproduce: when I launch the buggy version with the same -options as you, i.e. "-vnc 0.0.0.0:$port -vga qxl", and do cpr-transfer, -my VNC client silently hangs on the target after a while. Could it -happen on your stand as well? Could you try launching VM with -"-nographic -device qxl-vga"? That way VM's serial console is given you -directly in the shell, so when qxl driver crashes you're still able to -inspect the kernel messages. - -As for your patch, I can report that it doesn't resolve the issue as it -is. But I was able to track down another possible memory corruption -using your approach with readonly mmap'ing: - -> -Program terminated with signal SIGSEGV, Segmentation fault. -> -#0 init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 -> -412 d->ram->magic = cpu_to_le32(QXL_RAM_MAGIC); -> -[Current thread is 1 (Thread 0x7f1a4f83b480 (LWP 229798))] -> -(gdb) bt -> -#0 init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 -> -#1 0x0000563896e7f467 in qxl_realize_common (qxl=0x5638996e0e70, -> -errp=0x7ffd3c2b8170) at ../hw/display/qxl.c:2142 -> -#2 0x0000563896e7fda1 in qxl_realize_primary (dev=0x5638996e0e70, -> -errp=0x7ffd3c2b81d0) at ../hw/display/qxl.c:2257 -> -#3 0x0000563896c7e8f2 in pci_qdev_realize (qdev=0x5638996e0e70, -> -errp=0x7ffd3c2b8250) at ../hw/pci/pci.c:2174 -> -#4 0x00005638970eb54b in device_set_realized (obj=0x5638996e0e70, -> -value=true, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:494 -> -#5 0x00005638970f5e14 in property_set_bool (obj=0x5638996e0e70, -> -v=0x5638996f3770, name=0x56389759b141 "realized", opaque=0x5638987893d0, -> -errp=0x7ffd3c2b84e0) -> -at ../qom/object.c:2374 -> -#6 0x00005638970f39f8 in object_property_set (obj=0x5638996e0e70, -> -name=0x56389759b141 "realized", v=0x5638996f3770, errp=0x7ffd3c2b84e0) -> -at ../qom/object.c:1449 -> -#7 0x00005638970f8586 in object_property_set_qobject (obj=0x5638996e0e70, -> -name=0x56389759b141 "realized", value=0x5638996df900, errp=0x7ffd3c2b84e0) -> -at ../qom/qom-qobject.c:28 -> -#8 0x00005638970f3d8d in object_property_set_bool (obj=0x5638996e0e70, -> -name=0x56389759b141 "realized", value=true, errp=0x7ffd3c2b84e0) -> -at ../qom/object.c:1519 -> -#9 0x00005638970eacb0 in qdev_realize (dev=0x5638996e0e70, -> -bus=0x563898cf3c20, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:276 -> -#10 0x0000563896dba675 in qdev_device_add_from_qdict (opts=0x5638996dfe50, -> -from_json=false, errp=0x7ffd3c2b84e0) at ../system/qdev-monitor.c:714 -> -#11 0x0000563896dba721 in qdev_device_add (opts=0x563898786150, -> -errp=0x56389855dc40 ) at ../system/qdev-monitor.c:733 -> -#12 0x0000563896dc48f1 in device_init_func (opaque=0x0, opts=0x563898786150, -> -errp=0x56389855dc40 ) at ../system/vl.c:1207 -> -#13 0x000056389737a6cc in qemu_opts_foreach -> -(list=0x563898427b60 , func=0x563896dc48ca -> -, opaque=0x0, errp=0x56389855dc40 ) -> -at ../util/qemu-option.c:1135 -> -#14 0x0000563896dc89b5 in qemu_create_cli_devices () at ../system/vl.c:2745 -> -#15 0x0000563896dc8c00 in qmp_x_exit_preconfig (errp=0x56389855dc40 -> -) at ../system/vl.c:2806 -> -#16 0x0000563896dcb5de in qemu_init (argc=33, argv=0x7ffd3c2b8948) at -> -../system/vl.c:3838 -> -#17 0x0000563897297323 in main (argc=33, argv=0x7ffd3c2b8948) at -> -../system/main.c:72 -So the attached adjusted version of your patch does seem to help. At -least I can't reproduce the crash on my stand. - -I'm wondering, could it be useful to explicitly mark all the reused -memory regions readonly upon cpr-transfer, and then make them writable -back again after the migration is done? That way we will be segfaulting -early on instead of debugging tricky memory corruptions. - -Andrey -0001-hw-qxl-cpr-support-preliminary.patch -Description: -Text Data - -On 3/5/2025 11:50 AM, Andrey Drobyshev wrote: -On 3/4/25 9:05 PM, Steven Sistare wrote: -On 2/28/2025 1:37 PM, Andrey Drobyshev wrote: -On 2/28/25 8:35 PM, Andrey Drobyshev wrote: -On 2/28/25 8:20 PM, Steven Sistare wrote: -On 2/28/2025 1:13 PM, Steven Sistare wrote: -On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: -Hi all, - -We've been experimenting with cpr-transfer migration mode recently -and -have discovered the following issue with the guest QXL driver: - -Run migration source: -EMULATOR=/path/to/emulator -ROOTFS=/path/to/image -QMPSOCK=/var/run/alma8qmp-src.sock - -$EMULATOR -enable-kvm \ -       -machine q35 \ -       -cpu host -smp 2 -m 2G \ -       -object memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ -ram0,share=on\ -       -machine memory-backend=ram0 \ -       -machine aux-ram-share=on \ -       -drive file=$ROOTFS,media=disk,if=virtio \ -       -qmp unix:$QMPSOCK,server=on,wait=off \ -       -nographic \ -       -device qxl-vga -Run migration target: -EMULATOR=/path/to/emulator -ROOTFS=/path/to/image -QMPSOCK=/var/run/alma8qmp-dst.sock -$EMULATOR -enable-kvm \ -       -machine q35 \ -       -cpu host -smp 2 -m 2G \ -       -object memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ -ram0,share=on\ -       -machine memory-backend=ram0 \ -       -machine aux-ram-share=on \ -       -drive file=$ROOTFS,media=disk,if=virtio \ -       -qmp unix:$QMPSOCK,server=on,wait=off \ -       -nographic \ -       -device qxl-vga \ -       -incoming tcp:0:44444 \ -       -incoming '{"channel-type": "cpr", "addr": { "transport": -"socket", "type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' -Launch the migration: -QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell -QMPSOCK=/var/run/alma8qmp-src.sock - -$QMPSHELL -p $QMPSOCK < /dev/tty3 -           done -done - -echo "bug could not be reproduced" -exit 0 -The bug itself seems to remain unfixed, as I was able to reproduce -that -with Fedora 41 guest, as well as AlmaLinux 8 guest. However our -cpr-transfer code also seems to be buggy as it triggers the crash - -without the cpr-transfer migration the above reproduce doesn't -lead to -crash on the source VM. - -I suspect that, as cpr-transfer doesn't migrate the guest memory, but -rather passes it through the memory backend object, our code might -somehow corrupt the VRAM.  However, I wasn't able to trace the -corruption so far. - -Could somebody help the investigation and take a look into this?  Any -suggestions would be appreciated.  Thanks! -Possibly some memory region created by qxl is not being preserved. -Try adding these traces to see what is preserved: - --trace enable='*cpr*' --trace enable='*ram_alloc*' -Also try adding this patch to see if it flags any ram blocks as not -compatible with cpr.  A message is printed at migration start time. -   -https://lore.kernel.org/qemu-devel/1740667681-257312-1-git-send- -email- -steven.sistare@oracle.com/ - -- Steve -With the traces enabled + the "migration: ram block cpr blockers" patch -applied: - -Source: -cpr_find_fd pc.bios, id 0 returns -1 -cpr_save_fd pc.bios, id 0, fd 22 -qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 22 host -0x7fec18e00000 -cpr_find_fd pc.rom, id 0 returns -1 -cpr_save_fd pc.rom, id 0, fd 23 -qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 23 host -0x7fec18c00000 -cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns -1 -cpr_save_fd 0000:00:01.0/e1000e.rom, id 0, fd 24 -qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size -262144 fd 24 host 0x7fec18a00000 -cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns -1 -cpr_save_fd 0000:00:02.0/vga.vram, id 0, fd 25 -qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size -67108864 fd 25 host 0x7feb77e00000 -cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns -1 -cpr_save_fd 0000:00:02.0/qxl.vrom, id 0, fd 27 -qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 -fd 27 host 0x7fec18800000 -cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns -1 -cpr_save_fd 0000:00:02.0/qxl.vram, id 0, fd 28 -qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size -67108864 fd 28 host 0x7feb73c00000 -cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns -1 -cpr_save_fd 0000:00:02.0/qxl.rom, id 0, fd 34 -qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 -fd 34 host 0x7fec18600000 -cpr_find_fd /rom@etc/acpi/tables, id 0 returns -1 -cpr_save_fd /rom@etc/acpi/tables, id 0, fd 35 -qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size -2097152 fd 35 host 0x7fec18200000 -cpr_find_fd /rom@etc/table-loader, id 0 returns -1 -cpr_save_fd /rom@etc/table-loader, id 0, fd 36 -qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 -fd 36 host 0x7feb8b600000 -cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns -1 -cpr_save_fd /rom@etc/acpi/rsdp, id 0, fd 37 -qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd -37 host 0x7feb8b400000 - -cpr_state_save cpr-transfer mode -cpr_transfer_output /var/run/alma8cpr-dst.sock -Target: -cpr_transfer_input /var/run/alma8cpr-dst.sock -cpr_state_load cpr-transfer mode -cpr_find_fd pc.bios, id 0 returns 20 -qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 20 host -0x7fcdc9800000 -cpr_find_fd pc.rom, id 0 returns 19 -qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 19 host -0x7fcdc9600000 -cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns 18 -qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size -262144 fd 18 host 0x7fcdc9400000 -cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns 17 -qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size -67108864 fd 17 host 0x7fcd27e00000 -cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns 16 -qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 -fd 16 host 0x7fcdc9200000 -cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns 15 -qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size -67108864 fd 15 host 0x7fcd23c00000 -cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns 14 -qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 -fd 14 host 0x7fcdc8800000 -cpr_find_fd /rom@etc/acpi/tables, id 0 returns 13 -qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size -2097152 fd 13 host 0x7fcdc8400000 -cpr_find_fd /rom@etc/table-loader, id 0 returns 11 -qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 -fd 11 host 0x7fcdc8200000 -cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns 10 -qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd -10 host 0x7fcd3be00000 -Looks like both vga.vram and qxl.vram are being preserved (with the same -addresses), and no incompatible ram blocks are found during migration. -Sorry, addressed are not the same, of course.  However corresponding ram -blocks do seem to be preserved and initialized. -So far, I have not reproduced the guest driver failure. - -However, I have isolated places where new QEMU improperly writes to -the qxl memory regions prior to starting the guest, by mmap'ing them -readonly after cpr: - -   qemu_ram_alloc_internal() -     if (reused && (strstr(name, "qxl") || strstr("name", "vga"))) -         ram_flags |= RAM_READONLY; -     new_block = qemu_ram_alloc_from_fd(...) - -I have attached a draft fix; try it and let me know. -My console window looks fine before and after cpr, using --vnc $hostip:0 -vga qxl - -- Steve -Regarding the reproduce: when I launch the buggy version with the same -options as you, i.e. "-vnc 0.0.0.0:$port -vga qxl", and do cpr-transfer, -my VNC client silently hangs on the target after a while. Could it -happen on your stand as well? -cpr does not preserve the vnc connection and session. To test, I specify -port 0 for the source VM and port 1 for the dest. When the src vnc goes -dormant the dest vnc becomes active. -Could you try launching VM with -"-nographic -device qxl-vga"? That way VM's serial console is given you -directly in the shell, so when qxl driver crashes you're still able to -inspect the kernel messages. -I have been running like that, but have not reproduced the qxl driver crash, -and I suspect my guest image+kernel is too old. However, once I realized the -issue was post-cpr modification of qxl memory, I switched my attention to the -fix. -As for your patch, I can report that it doesn't resolve the issue as it -is. But I was able to track down another possible memory corruption -using your approach with readonly mmap'ing: -Program terminated with signal SIGSEGV, Segmentation fault. -#0 init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 -412 d->ram->magic = cpu_to_le32(QXL_RAM_MAGIC); -[Current thread is 1 (Thread 0x7f1a4f83b480 (LWP 229798))] -(gdb) bt -#0 init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 -#1 0x0000563896e7f467 in qxl_realize_common (qxl=0x5638996e0e70, -errp=0x7ffd3c2b8170) at ../hw/display/qxl.c:2142 -#2 0x0000563896e7fda1 in qxl_realize_primary (dev=0x5638996e0e70, -errp=0x7ffd3c2b81d0) at ../hw/display/qxl.c:2257 -#3 0x0000563896c7e8f2 in pci_qdev_realize (qdev=0x5638996e0e70, -errp=0x7ffd3c2b8250) at ../hw/pci/pci.c:2174 -#4 0x00005638970eb54b in device_set_realized (obj=0x5638996e0e70, value=true, -errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:494 -#5 0x00005638970f5e14 in property_set_bool (obj=0x5638996e0e70, v=0x5638996f3770, -name=0x56389759b141 "realized", opaque=0x5638987893d0, errp=0x7ffd3c2b84e0) - at ../qom/object.c:2374 -#6 0x00005638970f39f8 in object_property_set (obj=0x5638996e0e70, name=0x56389759b141 -"realized", v=0x5638996f3770, errp=0x7ffd3c2b84e0) - at ../qom/object.c:1449 -#7 0x00005638970f8586 in object_property_set_qobject (obj=0x5638996e0e70, -name=0x56389759b141 "realized", value=0x5638996df900, errp=0x7ffd3c2b84e0) - at ../qom/qom-qobject.c:28 -#8 0x00005638970f3d8d in object_property_set_bool (obj=0x5638996e0e70, -name=0x56389759b141 "realized", value=true, errp=0x7ffd3c2b84e0) - at ../qom/object.c:1519 -#9 0x00005638970eacb0 in qdev_realize (dev=0x5638996e0e70, bus=0x563898cf3c20, -errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:276 -#10 0x0000563896dba675 in qdev_device_add_from_qdict (opts=0x5638996dfe50, -from_json=false, errp=0x7ffd3c2b84e0) at ../system/qdev-monitor.c:714 -#11 0x0000563896dba721 in qdev_device_add (opts=0x563898786150, errp=0x56389855dc40 -) at ../system/qdev-monitor.c:733 -#12 0x0000563896dc48f1 in device_init_func (opaque=0x0, opts=0x563898786150, -errp=0x56389855dc40 ) at ../system/vl.c:1207 -#13 0x000056389737a6cc in qemu_opts_foreach - (list=0x563898427b60 , func=0x563896dc48ca , -opaque=0x0, errp=0x56389855dc40 ) - at ../util/qemu-option.c:1135 -#14 0x0000563896dc89b5 in qemu_create_cli_devices () at ../system/vl.c:2745 -#15 0x0000563896dc8c00 in qmp_x_exit_preconfig (errp=0x56389855dc40 -) at ../system/vl.c:2806 -#16 0x0000563896dcb5de in qemu_init (argc=33, argv=0x7ffd3c2b8948) at -../system/vl.c:3838 -#17 0x0000563897297323 in main (argc=33, argv=0x7ffd3c2b8948) at -../system/main.c:72 -So the attached adjusted version of your patch does seem to help. At -least I can't reproduce the crash on my stand. -Thanks for the stack trace; the calls to SPICE_RING_INIT in init_qxl_ram are -definitely harmful. Try V2 of the patch, attached, which skips the lines -of init_qxl_ram that modify guest memory. -I'm wondering, could it be useful to explicitly mark all the reused -memory regions readonly upon cpr-transfer, and then make them writable -back again after the migration is done? That way we will be segfaulting -early on instead of debugging tricky memory corruptions. -It's a useful debugging technique, but changing protection on a large memory -region -can be too expensive for production due to TLB shootdowns. - -Also, there are cases where writes are performed but the value is guaranteed to -be the same: - qxl_post_load() - qxl_set_mode() - d->rom->mode = cpu_to_le32(modenr); -The value is the same because mode and shadow_rom.mode were passed in vmstate -from old qemu. - -- Steve -0001-hw-qxl-cpr-support-preliminary-V2.patch -Description: -Text document - -On 3/5/25 22:19, Steven Sistare wrote: -On 3/5/2025 11:50 AM, Andrey Drobyshev wrote: -On 3/4/25 9:05 PM, Steven Sistare wrote: -On 2/28/2025 1:37 PM, Andrey Drobyshev wrote: -On 2/28/25 8:35 PM, Andrey Drobyshev wrote: -On 2/28/25 8:20 PM, Steven Sistare wrote: -On 2/28/2025 1:13 PM, Steven Sistare wrote: -On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: -Hi all, - -We've been experimenting with cpr-transfer migration mode recently -and -have discovered the following issue with the guest QXL driver: - -Run migration source: -EMULATOR=/path/to/emulator -ROOTFS=/path/to/image -QMPSOCK=/var/run/alma8qmp-src.sock - -$EMULATOR -enable-kvm \ -       -machine q35 \ -       -cpu host -smp 2 -m 2G \ -       -object -memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ -ram0,share=on\ -       -machine memory-backend=ram0 \ -       -machine aux-ram-share=on \ -       -drive file=$ROOTFS,media=disk,if=virtio \ -       -qmp unix:$QMPSOCK,server=on,wait=off \ -       -nographic \ -       -device qxl-vga -Run migration target: -EMULATOR=/path/to/emulator -ROOTFS=/path/to/image -QMPSOCK=/var/run/alma8qmp-dst.sock -$EMULATOR -enable-kvm \ -       -machine q35 \ -       -cpu host -smp 2 -m 2G \ -       -object -memory-backend-file,id=ram0,size=2G,mem-path=/dev/shm/ -ram0,share=on\ -       -machine memory-backend=ram0 \ -       -machine aux-ram-share=on \ -       -drive file=$ROOTFS,media=disk,if=virtio \ -       -qmp unix:$QMPSOCK,server=on,wait=off \ -       -nographic \ -       -device qxl-vga \ -       -incoming tcp:0:44444 \ -       -incoming '{"channel-type": "cpr", "addr": { "transport": -"socket", "type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' -Launch the migration: -QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell -QMPSOCK=/var/run/alma8qmp-src.sock - -$QMPSHELL -p $QMPSOCK < /dev/tty3 -           done -done - -echo "bug could not be reproduced" -exit 0 -The bug itself seems to remain unfixed, as I was able to reproduce -that -with Fedora 41 guest, as well as AlmaLinux 8 guest. However our -cpr-transfer code also seems to be buggy as it triggers the -crash - -without the cpr-transfer migration the above reproduce doesn't -lead to -crash on the source VM. -I suspect that, as cpr-transfer doesn't migrate the guest -memory, but -rather passes it through the memory backend object, our code might -somehow corrupt the VRAM.  However, I wasn't able to trace the -corruption so far. -Could somebody help the investigation and take a look into -this?  Any -suggestions would be appreciated.  Thanks! -Possibly some memory region created by qxl is not being preserved. -Try adding these traces to see what is preserved: - --trace enable='*cpr*' --trace enable='*ram_alloc*' -Also try adding this patch to see if it flags any ram blocks as not -compatible with cpr.  A message is printed at migration start time. -https://lore.kernel.org/qemu-devel/1740667681-257312-1-git-send- -email- -steven.sistare@oracle.com/ - -- Steve -With the traces enabled + the "migration: ram block cpr blockers" -patch -applied: - -Source: -cpr_find_fd pc.bios, id 0 returns -1 -cpr_save_fd pc.bios, id 0, fd 22 -qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 22 host -0x7fec18e00000 -cpr_find_fd pc.rom, id 0 returns -1 -cpr_save_fd pc.rom, id 0, fd 23 -qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 23 host -0x7fec18c00000 -cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns -1 -cpr_save_fd 0000:00:01.0/e1000e.rom, id 0, fd 24 -qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size -262144 fd 24 host 0x7fec18a00000 -cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns -1 -cpr_save_fd 0000:00:02.0/vga.vram, id 0, fd 25 -qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size -67108864 fd 25 host 0x7feb77e00000 -cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns -1 -cpr_save_fd 0000:00:02.0/qxl.vrom, id 0, fd 27 -qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 -fd 27 host 0x7fec18800000 -cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns -1 -cpr_save_fd 0000:00:02.0/qxl.vram, id 0, fd 28 -qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size -67108864 fd 28 host 0x7feb73c00000 -cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns -1 -cpr_save_fd 0000:00:02.0/qxl.rom, id 0, fd 34 -qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 -fd 34 host 0x7fec18600000 -cpr_find_fd /rom@etc/acpi/tables, id 0 returns -1 -cpr_save_fd /rom@etc/acpi/tables, id 0, fd 35 -qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size -2097152 fd 35 host 0x7fec18200000 -cpr_find_fd /rom@etc/table-loader, id 0 returns -1 -cpr_save_fd /rom@etc/table-loader, id 0, fd 36 -qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 -fd 36 host 0x7feb8b600000 -cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns -1 -cpr_save_fd /rom@etc/acpi/rsdp, id 0, fd 37 -qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd -37 host 0x7feb8b400000 - -cpr_state_save cpr-transfer mode -cpr_transfer_output /var/run/alma8cpr-dst.sock -Target: -cpr_transfer_input /var/run/alma8cpr-dst.sock -cpr_state_load cpr-transfer mode -cpr_find_fd pc.bios, id 0 returns 20 -qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 20 host -0x7fcdc9800000 -cpr_find_fd pc.rom, id 0 returns 19 -qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 19 host -0x7fcdc9600000 -cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns 18 -qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size -262144 fd 18 host 0x7fcdc9400000 -cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns 17 -qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size -67108864 fd 17 host 0x7fcd27e00000 -cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns 16 -qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 -fd 16 host 0x7fcdc9200000 -cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns 15 -qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size -67108864 fd 15 host 0x7fcd23c00000 -cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns 14 -qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 -fd 14 host 0x7fcdc8800000 -cpr_find_fd /rom@etc/acpi/tables, id 0 returns 13 -qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size -2097152 fd 13 host 0x7fcdc8400000 -cpr_find_fd /rom@etc/table-loader, id 0 returns 11 -qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 -fd 11 host 0x7fcdc8200000 -cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns 10 -qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd -10 host 0x7fcd3be00000 -Looks like both vga.vram and qxl.vram are being preserved (with -the same -addresses), and no incompatible ram blocks are found during -migration. -Sorry, addressed are not the same, of course.  However -corresponding ram -blocks do seem to be preserved and initialized. -So far, I have not reproduced the guest driver failure. - -However, I have isolated places where new QEMU improperly writes to -the qxl memory regions prior to starting the guest, by mmap'ing them -readonly after cpr: - -   qemu_ram_alloc_internal() -     if (reused && (strstr(name, "qxl") || strstr("name", "vga"))) -         ram_flags |= RAM_READONLY; -     new_block = qemu_ram_alloc_from_fd(...) - -I have attached a draft fix; try it and let me know. -My console window looks fine before and after cpr, using --vnc $hostip:0 -vga qxl - -- Steve -Regarding the reproduce: when I launch the buggy version with the same -options as you, i.e. "-vnc 0.0.0.0:$port -vga qxl", and do cpr-transfer, -my VNC client silently hangs on the target after a while.  Could it -happen on your stand as well? -cpr does not preserve the vnc connection and session.  To test, I specify -port 0 for the source VM and port 1 for the dest.  When the src vnc goes -dormant the dest vnc becomes active. -Could you try launching VM with -"-nographic -device qxl-vga"?  That way VM's serial console is given you -directly in the shell, so when qxl driver crashes you're still able to -inspect the kernel messages. -I have been running like that, but have not reproduced the qxl driver -crash, -and I suspect my guest image+kernel is too old.  However, once I -realized the -issue was post-cpr modification of qxl memory, I switched my attention -to the -fix. -As for your patch, I can report that it doesn't resolve the issue as it -is.  But I was able to track down another possible memory corruption -using your approach with readonly mmap'ing: -Program terminated with signal SIGSEGV, Segmentation fault. -#0  init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 -412         d->ram->magic       = cpu_to_le32(QXL_RAM_MAGIC); -[Current thread is 1 (Thread 0x7f1a4f83b480 (LWP 229798))] -(gdb) bt -#0  init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 -#1  0x0000563896e7f467 in qxl_realize_common (qxl=0x5638996e0e70, -errp=0x7ffd3c2b8170) at ../hw/display/qxl.c:2142 -#2  0x0000563896e7fda1 in qxl_realize_primary (dev=0x5638996e0e70, -errp=0x7ffd3c2b81d0) at ../hw/display/qxl.c:2257 -#3  0x0000563896c7e8f2 in pci_qdev_realize (qdev=0x5638996e0e70, -errp=0x7ffd3c2b8250) at ../hw/pci/pci.c:2174 -#4  0x00005638970eb54b in device_set_realized (obj=0x5638996e0e70, -value=true, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:494 -#5  0x00005638970f5e14 in property_set_bool (obj=0x5638996e0e70, -v=0x5638996f3770, name=0x56389759b141 "realized", -opaque=0x5638987893d0, errp=0x7ffd3c2b84e0) -     at ../qom/object.c:2374 -#6  0x00005638970f39f8 in object_property_set (obj=0x5638996e0e70, -name=0x56389759b141 "realized", v=0x5638996f3770, errp=0x7ffd3c2b84e0) -     at ../qom/object.c:1449 -#7  0x00005638970f8586 in object_property_set_qobject -(obj=0x5638996e0e70, name=0x56389759b141 "realized", -value=0x5638996df900, errp=0x7ffd3c2b84e0) -     at ../qom/qom-qobject.c:28 -#8  0x00005638970f3d8d in object_property_set_bool -(obj=0x5638996e0e70, name=0x56389759b141 "realized", value=true, -errp=0x7ffd3c2b84e0) -     at ../qom/object.c:1519 -#9  0x00005638970eacb0 in qdev_realize (dev=0x5638996e0e70, -bus=0x563898cf3c20, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:276 -#10 0x0000563896dba675 in qdev_device_add_from_qdict -(opts=0x5638996dfe50, from_json=false, errp=0x7ffd3c2b84e0) at -../system/qdev-monitor.c:714 -#11 0x0000563896dba721 in qdev_device_add (opts=0x563898786150, -errp=0x56389855dc40 ) at ../system/qdev-monitor.c:733 -#12 0x0000563896dc48f1 in device_init_func (opaque=0x0, -opts=0x563898786150, errp=0x56389855dc40 ) at -../system/vl.c:1207 -#13 0x000056389737a6cc in qemu_opts_foreach -     (list=0x563898427b60 , func=0x563896dc48ca -, opaque=0x0, errp=0x56389855dc40 ) -     at ../util/qemu-option.c:1135 -#14 0x0000563896dc89b5 in qemu_create_cli_devices () at -../system/vl.c:2745 -#15 0x0000563896dc8c00 in qmp_x_exit_preconfig (errp=0x56389855dc40 -) at ../system/vl.c:2806 -#16 0x0000563896dcb5de in qemu_init (argc=33, argv=0x7ffd3c2b8948) -at ../system/vl.c:3838 -#17 0x0000563897297323 in main (argc=33, argv=0x7ffd3c2b8948) at -../system/main.c:72 -So the attached adjusted version of your patch does seem to help.  At -least I can't reproduce the crash on my stand. -Thanks for the stack trace; the calls to SPICE_RING_INIT in -init_qxl_ram are -definitely harmful.  Try V2 of the patch, attached, which skips the lines -of init_qxl_ram that modify guest memory. -I'm wondering, could it be useful to explicitly mark all the reused -memory regions readonly upon cpr-transfer, and then make them writable -back again after the migration is done?  That way we will be segfaulting -early on instead of debugging tricky memory corruptions. -It's a useful debugging technique, but changing protection on a large -memory region -can be too expensive for production due to TLB shootdowns. -Good point. Though we could move this code under non-default option to -avoid re-writing. - -Den - -On 3/5/25 11:19 PM, Steven Sistare wrote: -> -On 3/5/2025 11:50 AM, Andrey Drobyshev wrote: -> -> On 3/4/25 9:05 PM, Steven Sistare wrote: -> ->> On 2/28/2025 1:37 PM, Andrey Drobyshev wrote: -> ->>> On 2/28/25 8:35 PM, Andrey Drobyshev wrote: -> ->>>> On 2/28/25 8:20 PM, Steven Sistare wrote: -> ->>>>> On 2/28/2025 1:13 PM, Steven Sistare wrote: -> ->>>>>> On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: -> ->>>>>>> Hi all, -> ->>>>>>> -> ->>>>>>> We've been experimenting with cpr-transfer migration mode recently -> ->>>>>>> and -> ->>>>>>> have discovered the following issue with the guest QXL driver: -> ->>>>>>> -> ->>>>>>> Run migration source: -> ->>>>>>>> EMULATOR=/path/to/emulator -> ->>>>>>>> ROOTFS=/path/to/image -> ->>>>>>>> QMPSOCK=/var/run/alma8qmp-src.sock -> ->>>>>>>> -> ->>>>>>>> $EMULATOR -enable-kvm \ -> ->>>>>>>>        -machine q35 \ -> ->>>>>>>>        -cpu host -smp 2 -m 2G \ -> ->>>>>>>>        -object memory-backend-file,id=ram0,size=2G,mem-path=/ -> ->>>>>>>> dev/shm/ -> ->>>>>>>> ram0,share=on\ -> ->>>>>>>>        -machine memory-backend=ram0 \ -> ->>>>>>>>        -machine aux-ram-share=on \ -> ->>>>>>>>        -drive file=$ROOTFS,media=disk,if=virtio \ -> ->>>>>>>>        -qmp unix:$QMPSOCK,server=on,wait=off \ -> ->>>>>>>>        -nographic \ -> ->>>>>>>>        -device qxl-vga -> ->>>>>>> -> ->>>>>>> Run migration target: -> ->>>>>>>> EMULATOR=/path/to/emulator -> ->>>>>>>> ROOTFS=/path/to/image -> ->>>>>>>> QMPSOCK=/var/run/alma8qmp-dst.sock -> ->>>>>>>> $EMULATOR -enable-kvm \ -> ->>>>>>>>        -machine q35 \ -> ->>>>>>>>        -cpu host -smp 2 -m 2G \ -> ->>>>>>>>        -object memory-backend-file,id=ram0,size=2G,mem-path=/ -> ->>>>>>>> dev/shm/ -> ->>>>>>>> ram0,share=on\ -> ->>>>>>>>        -machine memory-backend=ram0 \ -> ->>>>>>>>        -machine aux-ram-share=on \ -> ->>>>>>>>        -drive file=$ROOTFS,media=disk,if=virtio \ -> ->>>>>>>>        -qmp unix:$QMPSOCK,server=on,wait=off \ -> ->>>>>>>>        -nographic \ -> ->>>>>>>>        -device qxl-vga \ -> ->>>>>>>>        -incoming tcp:0:44444 \ -> ->>>>>>>>        -incoming '{"channel-type": "cpr", "addr": { "transport": -> ->>>>>>>> "socket", "type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' -> ->>>>>>> -> ->>>>>>> -> ->>>>>>> Launch the migration: -> ->>>>>>>> QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell -> ->>>>>>>> QMPSOCK=/var/run/alma8qmp-src.sock -> ->>>>>>>> -> ->>>>>>>> $QMPSHELL -p $QMPSOCK < ->>>>>>>>        migrate-set-parameters mode=cpr-transfer -> ->>>>>>>>        migrate channels=[{"channel-type":"main","addr": -> ->>>>>>>> {"transport":"socket","type":"inet","host":"0","port":"44444"}}, -> ->>>>>>>> {"channel-type":"cpr","addr": -> ->>>>>>>> {"transport":"socket","type":"unix","path":"/var/run/alma8cpr- -> ->>>>>>>> dst.sock"}}] -> ->>>>>>>> EOF -> ->>>>>>> -> ->>>>>>> Then, after a while, QXL guest driver on target crashes spewing the -> ->>>>>>> following messages: -> ->>>>>>>> [   73.962002] [TTM] Buffer eviction failed -> ->>>>>>>> [   73.962072] qxl 0000:00:02.0: object_init failed for (3149824, -> ->>>>>>>> 0x00000001) -> ->>>>>>>> [   73.962081] [drm:qxl_alloc_bo_reserved [qxl]] *ERROR* failed to -> ->>>>>>>> allocate VRAM BO -> ->>>>>>> -> ->>>>>>> That seems to be a known kernel QXL driver bug: -> ->>>>>>> -> ->>>>>>> -https://lore.kernel.org/all/20220907094423.93581-1- -> ->>>>>>> min_halo@163.com/T/ -> ->>>>>>> -https://lore.kernel.org/lkml/ZTgydqRlK6WX_b29@eldamar.lan/ -> ->>>>>>> -> ->>>>>>> (the latter discussion contains that reproduce script which -> ->>>>>>> speeds up -> ->>>>>>> the crash in the guest): -> ->>>>>>>> #!/bin/bash -> ->>>>>>>> -> ->>>>>>>> chvt 3 -> ->>>>>>>> -> ->>>>>>>> for j in $(seq 80); do -> ->>>>>>>>            echo "$(date) starting round $j" -> ->>>>>>>>            if [ "$(journalctl --boot | grep "failed to allocate -> ->>>>>>>> VRAM -> ->>>>>>>> BO")" != "" ]; then -> ->>>>>>>>                    echo "bug was reproduced after $j tries" -> ->>>>>>>>                    exit 1 -> ->>>>>>>>            fi -> ->>>>>>>>            for i in $(seq 100); do -> ->>>>>>>>                    dmesg > /dev/tty3 -> ->>>>>>>>            done -> ->>>>>>>> done -> ->>>>>>>> -> ->>>>>>>> echo "bug could not be reproduced" -> ->>>>>>>> exit 0 -> ->>>>>>> -> ->>>>>>> The bug itself seems to remain unfixed, as I was able to reproduce -> ->>>>>>> that -> ->>>>>>> with Fedora 41 guest, as well as AlmaLinux 8 guest. However our -> ->>>>>>> cpr-transfer code also seems to be buggy as it triggers the crash - -> ->>>>>>> without the cpr-transfer migration the above reproduce doesn't -> ->>>>>>> lead to -> ->>>>>>> crash on the source VM. -> ->>>>>>> -> ->>>>>>> I suspect that, as cpr-transfer doesn't migrate the guest -> ->>>>>>> memory, but -> ->>>>>>> rather passes it through the memory backend object, our code might -> ->>>>>>> somehow corrupt the VRAM.  However, I wasn't able to trace the -> ->>>>>>> corruption so far. -> ->>>>>>> -> ->>>>>>> Could somebody help the investigation and take a look into -> ->>>>>>> this?  Any -> ->>>>>>> suggestions would be appreciated.  Thanks! -> ->>>>>> -> ->>>>>> Possibly some memory region created by qxl is not being preserved. -> ->>>>>> Try adding these traces to see what is preserved: -> ->>>>>> -> ->>>>>> -trace enable='*cpr*' -> ->>>>>> -trace enable='*ram_alloc*' -> ->>>>> -> ->>>>> Also try adding this patch to see if it flags any ram blocks as not -> ->>>>> compatible with cpr.  A message is printed at migration start time. -> ->>>>>    -https://lore.kernel.org/qemu-devel/1740667681-257312-1-git-send- -> ->>>>> email- -> ->>>>> steven.sistare@oracle.com/ -> ->>>>> -> ->>>>> - Steve -> ->>>>> -> ->>>> -> ->>>> With the traces enabled + the "migration: ram block cpr blockers" -> ->>>> patch -> ->>>> applied: -> ->>>> -> ->>>> Source: -> ->>>>> cpr_find_fd pc.bios, id 0 returns -1 -> ->>>>> cpr_save_fd pc.bios, id 0, fd 22 -> ->>>>> qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 22 host -> ->>>>> 0x7fec18e00000 -> ->>>>> cpr_find_fd pc.rom, id 0 returns -1 -> ->>>>> cpr_save_fd pc.rom, id 0, fd 23 -> ->>>>> qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 23 host -> ->>>>> 0x7fec18c00000 -> ->>>>> cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns -1 -> ->>>>> cpr_save_fd 0000:00:01.0/e1000e.rom, id 0, fd 24 -> ->>>>> qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size -> ->>>>> 262144 fd 24 host 0x7fec18a00000 -> ->>>>> cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns -1 -> ->>>>> cpr_save_fd 0000:00:02.0/vga.vram, id 0, fd 25 -> ->>>>> qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size -> ->>>>> 67108864 fd 25 host 0x7feb77e00000 -> ->>>>> cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns -1 -> ->>>>> cpr_save_fd 0000:00:02.0/qxl.vrom, id 0, fd 27 -> ->>>>> qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 -> ->>>>> fd 27 host 0x7fec18800000 -> ->>>>> cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns -1 -> ->>>>> cpr_save_fd 0000:00:02.0/qxl.vram, id 0, fd 28 -> ->>>>> qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size -> ->>>>> 67108864 fd 28 host 0x7feb73c00000 -> ->>>>> cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns -1 -> ->>>>> cpr_save_fd 0000:00:02.0/qxl.rom, id 0, fd 34 -> ->>>>> qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 -> ->>>>> fd 34 host 0x7fec18600000 -> ->>>>> cpr_find_fd /rom@etc/acpi/tables, id 0 returns -1 -> ->>>>> cpr_save_fd /rom@etc/acpi/tables, id 0, fd 35 -> ->>>>> qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size -> ->>>>> 2097152 fd 35 host 0x7fec18200000 -> ->>>>> cpr_find_fd /rom@etc/table-loader, id 0 returns -1 -> ->>>>> cpr_save_fd /rom@etc/table-loader, id 0, fd 36 -> ->>>>> qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 -> ->>>>> fd 36 host 0x7feb8b600000 -> ->>>>> cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns -1 -> ->>>>> cpr_save_fd /rom@etc/acpi/rsdp, id 0, fd 37 -> ->>>>> qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd -> ->>>>> 37 host 0x7feb8b400000 -> ->>>>> -> ->>>>> cpr_state_save cpr-transfer mode -> ->>>>> cpr_transfer_output /var/run/alma8cpr-dst.sock -> ->>>> -> ->>>> Target: -> ->>>>> cpr_transfer_input /var/run/alma8cpr-dst.sock -> ->>>>> cpr_state_load cpr-transfer mode -> ->>>>> cpr_find_fd pc.bios, id 0 returns 20 -> ->>>>> qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 20 host -> ->>>>> 0x7fcdc9800000 -> ->>>>> cpr_find_fd pc.rom, id 0 returns 19 -> ->>>>> qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 19 host -> ->>>>> 0x7fcdc9600000 -> ->>>>> cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns 18 -> ->>>>> qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size -> ->>>>> 262144 fd 18 host 0x7fcdc9400000 -> ->>>>> cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns 17 -> ->>>>> qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size -> ->>>>> 67108864 fd 17 host 0x7fcd27e00000 -> ->>>>> cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns 16 -> ->>>>> qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 -> ->>>>> fd 16 host 0x7fcdc9200000 -> ->>>>> cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns 15 -> ->>>>> qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size -> ->>>>> 67108864 fd 15 host 0x7fcd23c00000 -> ->>>>> cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns 14 -> ->>>>> qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 -> ->>>>> fd 14 host 0x7fcdc8800000 -> ->>>>> cpr_find_fd /rom@etc/acpi/tables, id 0 returns 13 -> ->>>>> qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size -> ->>>>> 2097152 fd 13 host 0x7fcdc8400000 -> ->>>>> cpr_find_fd /rom@etc/table-loader, id 0 returns 11 -> ->>>>> qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 -> ->>>>> fd 11 host 0x7fcdc8200000 -> ->>>>> cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns 10 -> ->>>>> qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd -> ->>>>> 10 host 0x7fcd3be00000 -> ->>>> -> ->>>> Looks like both vga.vram and qxl.vram are being preserved (with the -> ->>>> same -> ->>>> addresses), and no incompatible ram blocks are found during migration. -> ->>> -> ->>> Sorry, addressed are not the same, of course.  However corresponding -> ->>> ram -> ->>> blocks do seem to be preserved and initialized. -> ->> -> ->> So far, I have not reproduced the guest driver failure. -> ->> -> ->> However, I have isolated places where new QEMU improperly writes to -> ->> the qxl memory regions prior to starting the guest, by mmap'ing them -> ->> readonly after cpr: -> ->> -> ->>    qemu_ram_alloc_internal() -> ->>      if (reused && (strstr(name, "qxl") || strstr("name", "vga"))) -> ->>          ram_flags |= RAM_READONLY; -> ->>      new_block = qemu_ram_alloc_from_fd(...) -> ->> -> ->> I have attached a draft fix; try it and let me know. -> ->> My console window looks fine before and after cpr, using -> ->> -vnc $hostip:0 -vga qxl -> ->> -> ->> - Steve -> -> -> -> Regarding the reproduce: when I launch the buggy version with the same -> -> options as you, i.e. "-vnc 0.0.0.0:$port -vga qxl", and do cpr-transfer, -> -> my VNC client silently hangs on the target after a while.  Could it -> -> happen on your stand as well? -> -> -cpr does not preserve the vnc connection and session.  To test, I specify -> -port 0 for the source VM and port 1 for the dest.  When the src vnc goes -> -dormant the dest vnc becomes active. -> -Sure, I meant that VNC on the dest (on the port 1) works for a while -after the migration and then hangs, apparently after the guest QXL crash. - -> -> Could you try launching VM with -> -> "-nographic -device qxl-vga"?  That way VM's serial console is given you -> -> directly in the shell, so when qxl driver crashes you're still able to -> -> inspect the kernel messages. -> -> -I have been running like that, but have not reproduced the qxl driver -> -crash, -> -and I suspect my guest image+kernel is too old. -Yes, that's probably the case. But the crash occurs on my Fedora 41 -guest with the 6.11.5-300.fc41.x86_64 kernel, so newer kernels seem to -be buggy. - - -> -However, once I realized the -> -issue was post-cpr modification of qxl memory, I switched my attention -> -to the -> -fix. -> -> -> As for your patch, I can report that it doesn't resolve the issue as it -> -> is.  But I was able to track down another possible memory corruption -> -> using your approach with readonly mmap'ing: -> -> -> ->> Program terminated with signal SIGSEGV, Segmentation fault. -> ->> #0  init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 -> ->> 412         d->ram->magic       = cpu_to_le32(QXL_RAM_MAGIC); -> ->> [Current thread is 1 (Thread 0x7f1a4f83b480 (LWP 229798))] -> ->> (gdb) bt -> ->> #0  init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 -> ->> #1  0x0000563896e7f467 in qxl_realize_common (qxl=0x5638996e0e70, -> ->> errp=0x7ffd3c2b8170) at ../hw/display/qxl.c:2142 -> ->> #2  0x0000563896e7fda1 in qxl_realize_primary (dev=0x5638996e0e70, -> ->> errp=0x7ffd3c2b81d0) at ../hw/display/qxl.c:2257 -> ->> #3  0x0000563896c7e8f2 in pci_qdev_realize (qdev=0x5638996e0e70, -> ->> errp=0x7ffd3c2b8250) at ../hw/pci/pci.c:2174 -> ->> #4  0x00005638970eb54b in device_set_realized (obj=0x5638996e0e70, -> ->> value=true, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:494 -> ->> #5  0x00005638970f5e14 in property_set_bool (obj=0x5638996e0e70, -> ->> v=0x5638996f3770, name=0x56389759b141 "realized", -> ->> opaque=0x5638987893d0, errp=0x7ffd3c2b84e0) -> ->>      at ../qom/object.c:2374 -> ->> #6  0x00005638970f39f8 in object_property_set (obj=0x5638996e0e70, -> ->> name=0x56389759b141 "realized", v=0x5638996f3770, errp=0x7ffd3c2b84e0) -> ->>      at ../qom/object.c:1449 -> ->> #7  0x00005638970f8586 in object_property_set_qobject -> ->> (obj=0x5638996e0e70, name=0x56389759b141 "realized", -> ->> value=0x5638996df900, errp=0x7ffd3c2b84e0) -> ->>      at ../qom/qom-qobject.c:28 -> ->> #8  0x00005638970f3d8d in object_property_set_bool -> ->> (obj=0x5638996e0e70, name=0x56389759b141 "realized", value=true, -> ->> errp=0x7ffd3c2b84e0) -> ->>      at ../qom/object.c:1519 -> ->> #9  0x00005638970eacb0 in qdev_realize (dev=0x5638996e0e70, -> ->> bus=0x563898cf3c20, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:276 -> ->> #10 0x0000563896dba675 in qdev_device_add_from_qdict -> ->> (opts=0x5638996dfe50, from_json=false, errp=0x7ffd3c2b84e0) at ../ -> ->> system/qdev-monitor.c:714 -> ->> #11 0x0000563896dba721 in qdev_device_add (opts=0x563898786150, -> ->> errp=0x56389855dc40 ) at ../system/qdev-monitor.c:733 -> ->> #12 0x0000563896dc48f1 in device_init_func (opaque=0x0, -> ->> opts=0x563898786150, errp=0x56389855dc40 ) at ../system/ -> ->> vl.c:1207 -> ->> #13 0x000056389737a6cc in qemu_opts_foreach -> ->>      (list=0x563898427b60 , func=0x563896dc48ca -> ->> , opaque=0x0, errp=0x56389855dc40 ) -> ->>      at ../util/qemu-option.c:1135 -> ->> #14 0x0000563896dc89b5 in qemu_create_cli_devices () at ../system/ -> ->> vl.c:2745 -> ->> #15 0x0000563896dc8c00 in qmp_x_exit_preconfig (errp=0x56389855dc40 -> ->> ) at ../system/vl.c:2806 -> ->> #16 0x0000563896dcb5de in qemu_init (argc=33, argv=0x7ffd3c2b8948) -> ->> at ../system/vl.c:3838 -> ->> #17 0x0000563897297323 in main (argc=33, argv=0x7ffd3c2b8948) at ../ -> ->> system/main.c:72 -> -> -> -> So the attached adjusted version of your patch does seem to help.  At -> -> least I can't reproduce the crash on my stand. -> -> -Thanks for the stack trace; the calls to SPICE_RING_INIT in init_qxl_ram -> -are -> -definitely harmful.  Try V2 of the patch, attached, which skips the lines -> -of init_qxl_ram that modify guest memory. -> -Thanks, your v2 patch does seem to prevent the crash. Would you re-send -it to the list as a proper fix? - -> -> I'm wondering, could it be useful to explicitly mark all the reused -> -> memory regions readonly upon cpr-transfer, and then make them writable -> -> back again after the migration is done?  That way we will be segfaulting -> -> early on instead of debugging tricky memory corruptions. -> -> -It's a useful debugging technique, but changing protection on a large -> -memory region -> -can be too expensive for production due to TLB shootdowns. -> -> -Also, there are cases where writes are performed but the value is -> -guaranteed to -> -be the same: -> -  qxl_post_load() -> -    qxl_set_mode() -> -      d->rom->mode = cpu_to_le32(modenr); -> -The value is the same because mode and shadow_rom.mode were passed in -> -vmstate -> -from old qemu. -> -There're also cases where devices' ROM might be re-initialized. E.g. -this segfault occures upon further exploration of RO mapped RAM blocks: - -> -Program terminated with signal SIGSEGV, Segmentation fault. -> -#0 __memmove_avx_unaligned_erms () at -> -../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:664 -> -664 rep movsb -> -[Current thread is 1 (Thread 0x7f6e7d08b480 (LWP 310379))] -> -(gdb) bt -> -#0 __memmove_avx_unaligned_erms () at -> -../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:664 -> -#1 0x000055aa1d030ecd in rom_set_mr (rom=0x55aa200ba380, -> -owner=0x55aa2019ac10, name=0x7fffb8272bc0 "/rom@etc/acpi/tables", ro=true) -> -at ../hw/core/loader.c:1032 -> -#2 0x000055aa1d031577 in rom_add_blob -> -(name=0x55aa1da51f13 "etc/acpi/tables", blob=0x55aa208a1070, len=131072, -> -max_len=2097152, addr=18446744073709551615, fw_file_name=0x55aa1da51f13 -> -"etc/acpi/tables", fw_callback=0x55aa1d441f59 , -> -callback_opaque=0x55aa20ff0010, as=0x0, read_only=true) at -> -../hw/core/loader.c:1147 -> -#3 0x000055aa1cfd788d in acpi_add_rom_blob -> -(update=0x55aa1d441f59 , opaque=0x55aa20ff0010, -> -blob=0x55aa1fc9aa00, name=0x55aa1da51f13 "etc/acpi/tables") at -> -../hw/acpi/utils.c:46 -> -#4 0x000055aa1d44213f in acpi_setup () at ../hw/i386/acpi-build.c:2720 -> -#5 0x000055aa1d434199 in pc_machine_done (notifier=0x55aa1ff15050, data=0x0) -> -at ../hw/i386/pc.c:638 -> -#6 0x000055aa1d876845 in notifier_list_notify (list=0x55aa1ea25c10 -> -, data=0x0) at ../util/notify.c:39 -> -#7 0x000055aa1d039ee5 in qdev_machine_creation_done () at -> -../hw/core/machine.c:1749 -> -#8 0x000055aa1d2c7b3e in qemu_machine_creation_done (errp=0x55aa1ea5cc40 -> -) at ../system/vl.c:2779 -> -#9 0x000055aa1d2c7c7d in qmp_x_exit_preconfig (errp=0x55aa1ea5cc40 -> -) at ../system/vl.c:2807 -> -#10 0x000055aa1d2ca64f in qemu_init (argc=35, argv=0x7fffb82730e8) at -> -../system/vl.c:3838 -> -#11 0x000055aa1d79638c in main (argc=35, argv=0x7fffb82730e8) at -> -../system/main.c:72 -I'm not sure whether ACPI tables ROM in particular is rewritten with the -same content, but there might be cases where ROM can be read from file -system upon initialization. That is undesirable as guest kernel -certainly won't be too happy about sudden change of the device's ROM -content. - -So the issue we're dealing with here is any unwanted memory related -device initialization upon cpr. - -For now the only thing that comes to my mind is to make a test where we -put as many devices as we can into a VM, make ram blocks RO upon cpr -(and remap them as RW later after migration is done, if needed), and -catch any unwanted memory violations. As Den suggested, we might -consider adding that behaviour as a separate non-default option (or -"migrate" command flag specific to cpr-transfer), which would only be -used in the testing. - -Andrey - -On 3/6/25 16:16, Andrey Drobyshev wrote: -On 3/5/25 11:19 PM, Steven Sistare wrote: -On 3/5/2025 11:50 AM, Andrey Drobyshev wrote: -On 3/4/25 9:05 PM, Steven Sistare wrote: -On 2/28/2025 1:37 PM, Andrey Drobyshev wrote: -On 2/28/25 8:35 PM, Andrey Drobyshev wrote: -On 2/28/25 8:20 PM, Steven Sistare wrote: -On 2/28/2025 1:13 PM, Steven Sistare wrote: -On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: -Hi all, - -We've been experimenting with cpr-transfer migration mode recently -and -have discovered the following issue with the guest QXL driver: - -Run migration source: -EMULATOR=/path/to/emulator -ROOTFS=/path/to/image -QMPSOCK=/var/run/alma8qmp-src.sock - -$EMULATOR -enable-kvm \ -        -machine q35 \ -        -cpu host -smp 2 -m 2G \ -        -object memory-backend-file,id=ram0,size=2G,mem-path=/ -dev/shm/ -ram0,share=on\ -        -machine memory-backend=ram0 \ -        -machine aux-ram-share=on \ -        -drive file=$ROOTFS,media=disk,if=virtio \ -        -qmp unix:$QMPSOCK,server=on,wait=off \ -        -nographic \ -        -device qxl-vga -Run migration target: -EMULATOR=/path/to/emulator -ROOTFS=/path/to/image -QMPSOCK=/var/run/alma8qmp-dst.sock -$EMULATOR -enable-kvm \ -        -machine q35 \ -        -cpu host -smp 2 -m 2G \ -        -object memory-backend-file,id=ram0,size=2G,mem-path=/ -dev/shm/ -ram0,share=on\ -        -machine memory-backend=ram0 \ -        -machine aux-ram-share=on \ -        -drive file=$ROOTFS,media=disk,if=virtio \ -        -qmp unix:$QMPSOCK,server=on,wait=off \ -        -nographic \ -        -device qxl-vga \ -        -incoming tcp:0:44444 \ -        -incoming '{"channel-type": "cpr", "addr": { "transport": -"socket", "type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' -Launch the migration: -QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell -QMPSOCK=/var/run/alma8qmp-src.sock - -$QMPSHELL -p $QMPSOCK < /dev/tty3 -            done -done - -echo "bug could not be reproduced" -exit 0 -The bug itself seems to remain unfixed, as I was able to reproduce -that -with Fedora 41 guest, as well as AlmaLinux 8 guest. However our -cpr-transfer code also seems to be buggy as it triggers the crash - -without the cpr-transfer migration the above reproduce doesn't -lead to -crash on the source VM. - -I suspect that, as cpr-transfer doesn't migrate the guest -memory, but -rather passes it through the memory backend object, our code might -somehow corrupt the VRAM.  However, I wasn't able to trace the -corruption so far. - -Could somebody help the investigation and take a look into -this?  Any -suggestions would be appreciated.  Thanks! -Possibly some memory region created by qxl is not being preserved. -Try adding these traces to see what is preserved: - --trace enable='*cpr*' --trace enable='*ram_alloc*' -Also try adding this patch to see if it flags any ram blocks as not -compatible with cpr.  A message is printed at migration start time. -    -https://lore.kernel.org/qemu-devel/1740667681-257312-1-git-send- -email- -steven.sistare@oracle.com/ - -- Steve -With the traces enabled + the "migration: ram block cpr blockers" -patch -applied: - -Source: -cpr_find_fd pc.bios, id 0 returns -1 -cpr_save_fd pc.bios, id 0, fd 22 -qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 22 host -0x7fec18e00000 -cpr_find_fd pc.rom, id 0 returns -1 -cpr_save_fd pc.rom, id 0, fd 23 -qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 23 host -0x7fec18c00000 -cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns -1 -cpr_save_fd 0000:00:01.0/e1000e.rom, id 0, fd 24 -qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size -262144 fd 24 host 0x7fec18a00000 -cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns -1 -cpr_save_fd 0000:00:02.0/vga.vram, id 0, fd 25 -qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size -67108864 fd 25 host 0x7feb77e00000 -cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns -1 -cpr_save_fd 0000:00:02.0/qxl.vrom, id 0, fd 27 -qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 -fd 27 host 0x7fec18800000 -cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns -1 -cpr_save_fd 0000:00:02.0/qxl.vram, id 0, fd 28 -qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size -67108864 fd 28 host 0x7feb73c00000 -cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns -1 -cpr_save_fd 0000:00:02.0/qxl.rom, id 0, fd 34 -qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 -fd 34 host 0x7fec18600000 -cpr_find_fd /rom@etc/acpi/tables, id 0 returns -1 -cpr_save_fd /rom@etc/acpi/tables, id 0, fd 35 -qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size -2097152 fd 35 host 0x7fec18200000 -cpr_find_fd /rom@etc/table-loader, id 0 returns -1 -cpr_save_fd /rom@etc/table-loader, id 0, fd 36 -qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 -fd 36 host 0x7feb8b600000 -cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns -1 -cpr_save_fd /rom@etc/acpi/rsdp, id 0, fd 37 -qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd -37 host 0x7feb8b400000 - -cpr_state_save cpr-transfer mode -cpr_transfer_output /var/run/alma8cpr-dst.sock -Target: -cpr_transfer_input /var/run/alma8cpr-dst.sock -cpr_state_load cpr-transfer mode -cpr_find_fd pc.bios, id 0 returns 20 -qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 20 host -0x7fcdc9800000 -cpr_find_fd pc.rom, id 0 returns 19 -qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 19 host -0x7fcdc9600000 -cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns 18 -qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size -262144 fd 18 host 0x7fcdc9400000 -cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns 17 -qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size -67108864 fd 17 host 0x7fcd27e00000 -cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns 16 -qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 -fd 16 host 0x7fcdc9200000 -cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns 15 -qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size -67108864 fd 15 host 0x7fcd23c00000 -cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns 14 -qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 -fd 14 host 0x7fcdc8800000 -cpr_find_fd /rom@etc/acpi/tables, id 0 returns 13 -qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size -2097152 fd 13 host 0x7fcdc8400000 -cpr_find_fd /rom@etc/table-loader, id 0 returns 11 -qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 -fd 11 host 0x7fcdc8200000 -cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns 10 -qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd -10 host 0x7fcd3be00000 -Looks like both vga.vram and qxl.vram are being preserved (with the -same -addresses), and no incompatible ram blocks are found during migration. -Sorry, addressed are not the same, of course.  However corresponding -ram -blocks do seem to be preserved and initialized. -So far, I have not reproduced the guest driver failure. - -However, I have isolated places where new QEMU improperly writes to -the qxl memory regions prior to starting the guest, by mmap'ing them -readonly after cpr: - -    qemu_ram_alloc_internal() -      if (reused && (strstr(name, "qxl") || strstr("name", "vga"))) -          ram_flags |= RAM_READONLY; -      new_block = qemu_ram_alloc_from_fd(...) - -I have attached a draft fix; try it and let me know. -My console window looks fine before and after cpr, using --vnc $hostip:0 -vga qxl - -- Steve -Regarding the reproduce: when I launch the buggy version with the same -options as you, i.e. "-vnc 0.0.0.0:$port -vga qxl", and do cpr-transfer, -my VNC client silently hangs on the target after a while.  Could it -happen on your stand as well? -cpr does not preserve the vnc connection and session.  To test, I specify -port 0 for the source VM and port 1 for the dest.  When the src vnc goes -dormant the dest vnc becomes active. -Sure, I meant that VNC on the dest (on the port 1) works for a while -after the migration and then hangs, apparently after the guest QXL crash. -Could you try launching VM with -"-nographic -device qxl-vga"?  That way VM's serial console is given you -directly in the shell, so when qxl driver crashes you're still able to -inspect the kernel messages. -I have been running like that, but have not reproduced the qxl driver -crash, -and I suspect my guest image+kernel is too old. -Yes, that's probably the case. But the crash occurs on my Fedora 41 -guest with the 6.11.5-300.fc41.x86_64 kernel, so newer kernels seem to -be buggy. -However, once I realized the -issue was post-cpr modification of qxl memory, I switched my attention -to the -fix. -As for your patch, I can report that it doesn't resolve the issue as it -is.  But I was able to track down another possible memory corruption -using your approach with readonly mmap'ing: -Program terminated with signal SIGSEGV, Segmentation fault. -#0  init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 -412         d->ram->magic       = cpu_to_le32(QXL_RAM_MAGIC); -[Current thread is 1 (Thread 0x7f1a4f83b480 (LWP 229798))] -(gdb) bt -#0  init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 -#1  0x0000563896e7f467 in qxl_realize_common (qxl=0x5638996e0e70, -errp=0x7ffd3c2b8170) at ../hw/display/qxl.c:2142 -#2  0x0000563896e7fda1 in qxl_realize_primary (dev=0x5638996e0e70, -errp=0x7ffd3c2b81d0) at ../hw/display/qxl.c:2257 -#3  0x0000563896c7e8f2 in pci_qdev_realize (qdev=0x5638996e0e70, -errp=0x7ffd3c2b8250) at ../hw/pci/pci.c:2174 -#4  0x00005638970eb54b in device_set_realized (obj=0x5638996e0e70, -value=true, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:494 -#5  0x00005638970f5e14 in property_set_bool (obj=0x5638996e0e70, -v=0x5638996f3770, name=0x56389759b141 "realized", -opaque=0x5638987893d0, errp=0x7ffd3c2b84e0) -      at ../qom/object.c:2374 -#6  0x00005638970f39f8 in object_property_set (obj=0x5638996e0e70, -name=0x56389759b141 "realized", v=0x5638996f3770, errp=0x7ffd3c2b84e0) -      at ../qom/object.c:1449 -#7  0x00005638970f8586 in object_property_set_qobject -(obj=0x5638996e0e70, name=0x56389759b141 "realized", -value=0x5638996df900, errp=0x7ffd3c2b84e0) -      at ../qom/qom-qobject.c:28 -#8  0x00005638970f3d8d in object_property_set_bool -(obj=0x5638996e0e70, name=0x56389759b141 "realized", value=true, -errp=0x7ffd3c2b84e0) -      at ../qom/object.c:1519 -#9  0x00005638970eacb0 in qdev_realize (dev=0x5638996e0e70, -bus=0x563898cf3c20, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:276 -#10 0x0000563896dba675 in qdev_device_add_from_qdict -(opts=0x5638996dfe50, from_json=false, errp=0x7ffd3c2b84e0) at ../ -system/qdev-monitor.c:714 -#11 0x0000563896dba721 in qdev_device_add (opts=0x563898786150, -errp=0x56389855dc40 ) at ../system/qdev-monitor.c:733 -#12 0x0000563896dc48f1 in device_init_func (opaque=0x0, -opts=0x563898786150, errp=0x56389855dc40 ) at ../system/ -vl.c:1207 -#13 0x000056389737a6cc in qemu_opts_foreach -      (list=0x563898427b60 , func=0x563896dc48ca -, opaque=0x0, errp=0x56389855dc40 ) -      at ../util/qemu-option.c:1135 -#14 0x0000563896dc89b5 in qemu_create_cli_devices () at ../system/ -vl.c:2745 -#15 0x0000563896dc8c00 in qmp_x_exit_preconfig (errp=0x56389855dc40 -) at ../system/vl.c:2806 -#16 0x0000563896dcb5de in qemu_init (argc=33, argv=0x7ffd3c2b8948) -at ../system/vl.c:3838 -#17 0x0000563897297323 in main (argc=33, argv=0x7ffd3c2b8948) at ../ -system/main.c:72 -So the attached adjusted version of your patch does seem to help.  At -least I can't reproduce the crash on my stand. -Thanks for the stack trace; the calls to SPICE_RING_INIT in init_qxl_ram -are -definitely harmful.  Try V2 of the patch, attached, which skips the lines -of init_qxl_ram that modify guest memory. -Thanks, your v2 patch does seem to prevent the crash. Would you re-send -it to the list as a proper fix? -I'm wondering, could it be useful to explicitly mark all the reused -memory regions readonly upon cpr-transfer, and then make them writable -back again after the migration is done?  That way we will be segfaulting -early on instead of debugging tricky memory corruptions. -It's a useful debugging technique, but changing protection on a large -memory region -can be too expensive for production due to TLB shootdowns. - -Also, there are cases where writes are performed but the value is -guaranteed to -be the same: -   qxl_post_load() -     qxl_set_mode() -       d->rom->mode = cpu_to_le32(modenr); -The value is the same because mode and shadow_rom.mode were passed in -vmstate -from old qemu. -There're also cases where devices' ROM might be re-initialized. E.g. -this segfault occures upon further exploration of RO mapped RAM blocks: -Program terminated with signal SIGSEGV, Segmentation fault. -#0 __memmove_avx_unaligned_erms () at -../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:664 -664 rep movsb -[Current thread is 1 (Thread 0x7f6e7d08b480 (LWP 310379))] -(gdb) bt -#0 __memmove_avx_unaligned_erms () at -../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:664 -#1 0x000055aa1d030ecd in rom_set_mr (rom=0x55aa200ba380, owner=0x55aa2019ac10, -name=0x7fffb8272bc0 "/rom@etc/acpi/tables", ro=true) - at ../hw/core/loader.c:1032 -#2 0x000055aa1d031577 in rom_add_blob - (name=0x55aa1da51f13 "etc/acpi/tables", blob=0x55aa208a1070, len=131072, max_len=2097152, -addr=18446744073709551615, fw_file_name=0x55aa1da51f13 "etc/acpi/tables", -fw_callback=0x55aa1d441f59 , callback_opaque=0x55aa20ff0010, as=0x0, -read_only=true) at ../hw/core/loader.c:1147 -#3 0x000055aa1cfd788d in acpi_add_rom_blob - (update=0x55aa1d441f59 , opaque=0x55aa20ff0010, -blob=0x55aa1fc9aa00, name=0x55aa1da51f13 "etc/acpi/tables") at ../hw/acpi/utils.c:46 -#4 0x000055aa1d44213f in acpi_setup () at ../hw/i386/acpi-build.c:2720 -#5 0x000055aa1d434199 in pc_machine_done (notifier=0x55aa1ff15050, data=0x0) -at ../hw/i386/pc.c:638 -#6 0x000055aa1d876845 in notifier_list_notify (list=0x55aa1ea25c10 -, data=0x0) at ../util/notify.c:39 -#7 0x000055aa1d039ee5 in qdev_machine_creation_done () at -../hw/core/machine.c:1749 -#8 0x000055aa1d2c7b3e in qemu_machine_creation_done (errp=0x55aa1ea5cc40 -) at ../system/vl.c:2779 -#9 0x000055aa1d2c7c7d in qmp_x_exit_preconfig (errp=0x55aa1ea5cc40 -) at ../system/vl.c:2807 -#10 0x000055aa1d2ca64f in qemu_init (argc=35, argv=0x7fffb82730e8) at -../system/vl.c:3838 -#11 0x000055aa1d79638c in main (argc=35, argv=0x7fffb82730e8) at -../system/main.c:72 -I'm not sure whether ACPI tables ROM in particular is rewritten with the -same content, but there might be cases where ROM can be read from file -system upon initialization. That is undesirable as guest kernel -certainly won't be too happy about sudden change of the device's ROM -content. - -So the issue we're dealing with here is any unwanted memory related -device initialization upon cpr. - -For now the only thing that comes to my mind is to make a test where we -put as many devices as we can into a VM, make ram blocks RO upon cpr -(and remap them as RW later after migration is done, if needed), and -catch any unwanted memory violations. As Den suggested, we might -consider adding that behaviour as a separate non-default option (or -"migrate" command flag specific to cpr-transfer), which would only be -used in the testing. - -Andrey -No way. ACPI with the source must be used in the same way as BIOSes -and optional ROMs. - -Den - -On 3/6/2025 10:52 AM, Denis V. Lunev wrote: -On 3/6/25 16:16, Andrey Drobyshev wrote: -On 3/5/25 11:19 PM, Steven Sistare wrote: -On 3/5/2025 11:50 AM, Andrey Drobyshev wrote: -On 3/4/25 9:05 PM, Steven Sistare wrote: -On 2/28/2025 1:37 PM, Andrey Drobyshev wrote: -On 2/28/25 8:35 PM, Andrey Drobyshev wrote: -On 2/28/25 8:20 PM, Steven Sistare wrote: -On 2/28/2025 1:13 PM, Steven Sistare wrote: -On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: -Hi all, - -We've been experimenting with cpr-transfer migration mode recently -and -have discovered the following issue with the guest QXL driver: - -Run migration source: -EMULATOR=/path/to/emulator -ROOTFS=/path/to/image -QMPSOCK=/var/run/alma8qmp-src.sock - -$EMULATOR -enable-kvm \ -        -machine q35 \ -        -cpu host -smp 2 -m 2G \ -        -object memory-backend-file,id=ram0,size=2G,mem-path=/ -dev/shm/ -ram0,share=on\ -        -machine memory-backend=ram0 \ -        -machine aux-ram-share=on \ -        -drive file=$ROOTFS,media=disk,if=virtio \ -        -qmp unix:$QMPSOCK,server=on,wait=off \ -        -nographic \ -        -device qxl-vga -Run migration target: -EMULATOR=/path/to/emulator -ROOTFS=/path/to/image -QMPSOCK=/var/run/alma8qmp-dst.sock -$EMULATOR -enable-kvm \ -        -machine q35 \ -        -cpu host -smp 2 -m 2G \ -        -object memory-backend-file,id=ram0,size=2G,mem-path=/ -dev/shm/ -ram0,share=on\ -        -machine memory-backend=ram0 \ -        -machine aux-ram-share=on \ -        -drive file=$ROOTFS,media=disk,if=virtio \ -        -qmp unix:$QMPSOCK,server=on,wait=off \ -        -nographic \ -        -device qxl-vga \ -        -incoming tcp:0:44444 \ -        -incoming '{"channel-type": "cpr", "addr": { "transport": -"socket", "type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' -Launch the migration: -QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell -QMPSOCK=/var/run/alma8qmp-src.sock - -$QMPSHELL -p $QMPSOCK < /dev/tty3 -            done -done - -echo "bug could not be reproduced" -exit 0 -The bug itself seems to remain unfixed, as I was able to reproduce -that -with Fedora 41 guest, as well as AlmaLinux 8 guest. However our -cpr-transfer code also seems to be buggy as it triggers the crash - -without the cpr-transfer migration the above reproduce doesn't -lead to -crash on the source VM. - -I suspect that, as cpr-transfer doesn't migrate the guest -memory, but -rather passes it through the memory backend object, our code might -somehow corrupt the VRAM.  However, I wasn't able to trace the -corruption so far. - -Could somebody help the investigation and take a look into -this?  Any -suggestions would be appreciated.  Thanks! -Possibly some memory region created by qxl is not being preserved. -Try adding these traces to see what is preserved: - --trace enable='*cpr*' --trace enable='*ram_alloc*' -Also try adding this patch to see if it flags any ram blocks as not -compatible with cpr.  A message is printed at migration start time. -    -https://lore.kernel.org/qemu-devel/1740667681-257312-1-git-send- -email- -steven.sistare@oracle.com/ - -- Steve -With the traces enabled + the "migration: ram block cpr blockers" -patch -applied: - -Source: -cpr_find_fd pc.bios, id 0 returns -1 -cpr_save_fd pc.bios, id 0, fd 22 -qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 22 host -0x7fec18e00000 -cpr_find_fd pc.rom, id 0 returns -1 -cpr_save_fd pc.rom, id 0, fd 23 -qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 23 host -0x7fec18c00000 -cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns -1 -cpr_save_fd 0000:00:01.0/e1000e.rom, id 0, fd 24 -qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size -262144 fd 24 host 0x7fec18a00000 -cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns -1 -cpr_save_fd 0000:00:02.0/vga.vram, id 0, fd 25 -qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size -67108864 fd 25 host 0x7feb77e00000 -cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns -1 -cpr_save_fd 0000:00:02.0/qxl.vrom, id 0, fd 27 -qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 -fd 27 host 0x7fec18800000 -cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns -1 -cpr_save_fd 0000:00:02.0/qxl.vram, id 0, fd 28 -qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size -67108864 fd 28 host 0x7feb73c00000 -cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns -1 -cpr_save_fd 0000:00:02.0/qxl.rom, id 0, fd 34 -qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 -fd 34 host 0x7fec18600000 -cpr_find_fd /rom@etc/acpi/tables, id 0 returns -1 -cpr_save_fd /rom@etc/acpi/tables, id 0, fd 35 -qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size -2097152 fd 35 host 0x7fec18200000 -cpr_find_fd /rom@etc/table-loader, id 0 returns -1 -cpr_save_fd /rom@etc/table-loader, id 0, fd 36 -qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 -fd 36 host 0x7feb8b600000 -cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns -1 -cpr_save_fd /rom@etc/acpi/rsdp, id 0, fd 37 -qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd -37 host 0x7feb8b400000 - -cpr_state_save cpr-transfer mode -cpr_transfer_output /var/run/alma8cpr-dst.sock -Target: -cpr_transfer_input /var/run/alma8cpr-dst.sock -cpr_state_load cpr-transfer mode -cpr_find_fd pc.bios, id 0 returns 20 -qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 20 host -0x7fcdc9800000 -cpr_find_fd pc.rom, id 0 returns 19 -qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 19 host -0x7fcdc9600000 -cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns 18 -qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size -262144 fd 18 host 0x7fcdc9400000 -cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns 17 -qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size -67108864 fd 17 host 0x7fcd27e00000 -cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns 16 -qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 -fd 16 host 0x7fcdc9200000 -cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns 15 -qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size -67108864 fd 15 host 0x7fcd23c00000 -cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns 14 -qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 -fd 14 host 0x7fcdc8800000 -cpr_find_fd /rom@etc/acpi/tables, id 0 returns 13 -qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size -2097152 fd 13 host 0x7fcdc8400000 -cpr_find_fd /rom@etc/table-loader, id 0 returns 11 -qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 -fd 11 host 0x7fcdc8200000 -cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns 10 -qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd -10 host 0x7fcd3be00000 -Looks like both vga.vram and qxl.vram are being preserved (with the -same -addresses), and no incompatible ram blocks are found during migration. -Sorry, addressed are not the same, of course.  However corresponding -ram -blocks do seem to be preserved and initialized. -So far, I have not reproduced the guest driver failure. - -However, I have isolated places where new QEMU improperly writes to -the qxl memory regions prior to starting the guest, by mmap'ing them -readonly after cpr: - -    qemu_ram_alloc_internal() -      if (reused && (strstr(name, "qxl") || strstr("name", "vga"))) -          ram_flags |= RAM_READONLY; -      new_block = qemu_ram_alloc_from_fd(...) - -I have attached a draft fix; try it and let me know. -My console window looks fine before and after cpr, using --vnc $hostip:0 -vga qxl - -- Steve -Regarding the reproduce: when I launch the buggy version with the same -options as you, i.e. "-vnc 0.0.0.0:$port -vga qxl", and do cpr-transfer, -my VNC client silently hangs on the target after a while.  Could it -happen on your stand as well? -cpr does not preserve the vnc connection and session.  To test, I specify -port 0 for the source VM and port 1 for the dest.  When the src vnc goes -dormant the dest vnc becomes active. -Sure, I meant that VNC on the dest (on the port 1) works for a while -after the migration and then hangs, apparently after the guest QXL crash. -Could you try launching VM with -"-nographic -device qxl-vga"?  That way VM's serial console is given you -directly in the shell, so when qxl driver crashes you're still able to -inspect the kernel messages. -I have been running like that, but have not reproduced the qxl driver -crash, -and I suspect my guest image+kernel is too old. -Yes, that's probably the case.  But the crash occurs on my Fedora 41 -guest with the 6.11.5-300.fc41.x86_64 kernel, so newer kernels seem to -be buggy. -However, once I realized the -issue was post-cpr modification of qxl memory, I switched my attention -to the -fix. -As for your patch, I can report that it doesn't resolve the issue as it -is.  But I was able to track down another possible memory corruption -using your approach with readonly mmap'ing: -Program terminated with signal SIGSEGV, Segmentation fault. -#0  init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 -412         d->ram->magic       = cpu_to_le32(QXL_RAM_MAGIC); -[Current thread is 1 (Thread 0x7f1a4f83b480 (LWP 229798))] -(gdb) bt -#0  init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 -#1  0x0000563896e7f467 in qxl_realize_common (qxl=0x5638996e0e70, -errp=0x7ffd3c2b8170) at ../hw/display/qxl.c:2142 -#2  0x0000563896e7fda1 in qxl_realize_primary (dev=0x5638996e0e70, -errp=0x7ffd3c2b81d0) at ../hw/display/qxl.c:2257 -#3  0x0000563896c7e8f2 in pci_qdev_realize (qdev=0x5638996e0e70, -errp=0x7ffd3c2b8250) at ../hw/pci/pci.c:2174 -#4  0x00005638970eb54b in device_set_realized (obj=0x5638996e0e70, -value=true, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:494 -#5  0x00005638970f5e14 in property_set_bool (obj=0x5638996e0e70, -v=0x5638996f3770, name=0x56389759b141 "realized", -opaque=0x5638987893d0, errp=0x7ffd3c2b84e0) -      at ../qom/object.c:2374 -#6  0x00005638970f39f8 in object_property_set (obj=0x5638996e0e70, -name=0x56389759b141 "realized", v=0x5638996f3770, errp=0x7ffd3c2b84e0) -      at ../qom/object.c:1449 -#7  0x00005638970f8586 in object_property_set_qobject -(obj=0x5638996e0e70, name=0x56389759b141 "realized", -value=0x5638996df900, errp=0x7ffd3c2b84e0) -      at ../qom/qom-qobject.c:28 -#8  0x00005638970f3d8d in object_property_set_bool -(obj=0x5638996e0e70, name=0x56389759b141 "realized", value=true, -errp=0x7ffd3c2b84e0) -      at ../qom/object.c:1519 -#9  0x00005638970eacb0 in qdev_realize (dev=0x5638996e0e70, -bus=0x563898cf3c20, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:276 -#10 0x0000563896dba675 in qdev_device_add_from_qdict -(opts=0x5638996dfe50, from_json=false, errp=0x7ffd3c2b84e0) at ../ -system/qdev-monitor.c:714 -#11 0x0000563896dba721 in qdev_device_add (opts=0x563898786150, -errp=0x56389855dc40 ) at ../system/qdev-monitor.c:733 -#12 0x0000563896dc48f1 in device_init_func (opaque=0x0, -opts=0x563898786150, errp=0x56389855dc40 ) at ../system/ -vl.c:1207 -#13 0x000056389737a6cc in qemu_opts_foreach -      (list=0x563898427b60 , func=0x563896dc48ca -, opaque=0x0, errp=0x56389855dc40 ) -      at ../util/qemu-option.c:1135 -#14 0x0000563896dc89b5 in qemu_create_cli_devices () at ../system/ -vl.c:2745 -#15 0x0000563896dc8c00 in qmp_x_exit_preconfig (errp=0x56389855dc40 -) at ../system/vl.c:2806 -#16 0x0000563896dcb5de in qemu_init (argc=33, argv=0x7ffd3c2b8948) -at ../system/vl.c:3838 -#17 0x0000563897297323 in main (argc=33, argv=0x7ffd3c2b8948) at ../ -system/main.c:72 -So the attached adjusted version of your patch does seem to help.  At -least I can't reproduce the crash on my stand. -Thanks for the stack trace; the calls to SPICE_RING_INIT in init_qxl_ram -are -definitely harmful.  Try V2 of the patch, attached, which skips the lines -of init_qxl_ram that modify guest memory. -Thanks, your v2 patch does seem to prevent the crash.  Would you re-send -it to the list as a proper fix? -Yes. Was waiting for your confirmation. -I'm wondering, could it be useful to explicitly mark all the reused -memory regions readonly upon cpr-transfer, and then make them writable -back again after the migration is done?  That way we will be segfaulting -early on instead of debugging tricky memory corruptions. -It's a useful debugging technique, but changing protection on a large -memory region -can be too expensive for production due to TLB shootdowns. - -Also, there are cases where writes are performed but the value is -guaranteed to -be the same: -   qxl_post_load() -     qxl_set_mode() -       d->rom->mode = cpu_to_le32(modenr); -The value is the same because mode and shadow_rom.mode were passed in -vmstate -from old qemu. -There're also cases where devices' ROM might be re-initialized.  E.g. -this segfault occures upon further exploration of RO mapped RAM blocks: -Program terminated with signal SIGSEGV, Segmentation fault. -#0  __memmove_avx_unaligned_erms () at -../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:664 -664             rep     movsb -[Current thread is 1 (Thread 0x7f6e7d08b480 (LWP 310379))] -(gdb) bt -#0  __memmove_avx_unaligned_erms () at -../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:664 -#1  0x000055aa1d030ecd in rom_set_mr (rom=0x55aa200ba380, owner=0x55aa2019ac10, -name=0x7fffb8272bc0 "/rom@etc/acpi/tables", ro=true) -     at ../hw/core/loader.c:1032 -#2  0x000055aa1d031577 in rom_add_blob -     (name=0x55aa1da51f13 "etc/acpi/tables", blob=0x55aa208a1070, len=131072, max_len=2097152, -addr=18446744073709551615, fw_file_name=0x55aa1da51f13 "etc/acpi/tables", -fw_callback=0x55aa1d441f59 , callback_opaque=0x55aa20ff0010, as=0x0, -read_only=true) at ../hw/core/loader.c:1147 -#3  0x000055aa1cfd788d in acpi_add_rom_blob -     (update=0x55aa1d441f59 , opaque=0x55aa20ff0010, -blob=0x55aa1fc9aa00, name=0x55aa1da51f13 "etc/acpi/tables") at ../hw/acpi/utils.c:46 -#4  0x000055aa1d44213f in acpi_setup () at ../hw/i386/acpi-build.c:2720 -#5  0x000055aa1d434199 in pc_machine_done (notifier=0x55aa1ff15050, data=0x0) -at ../hw/i386/pc.c:638 -#6  0x000055aa1d876845 in notifier_list_notify (list=0x55aa1ea25c10 -, data=0x0) at ../util/notify.c:39 -#7  0x000055aa1d039ee5 in qdev_machine_creation_done () at -../hw/core/machine.c:1749 -#8  0x000055aa1d2c7b3e in qemu_machine_creation_done (errp=0x55aa1ea5cc40 -) at ../system/vl.c:2779 -#9  0x000055aa1d2c7c7d in qmp_x_exit_preconfig (errp=0x55aa1ea5cc40 -) at ../system/vl.c:2807 -#10 0x000055aa1d2ca64f in qemu_init (argc=35, argv=0x7fffb82730e8) at -../system/vl.c:3838 -#11 0x000055aa1d79638c in main (argc=35, argv=0x7fffb82730e8) at -../system/main.c:72 -I'm not sure whether ACPI tables ROM in particular is rewritten with the -same content, but there might be cases where ROM can be read from file -system upon initialization.  That is undesirable as guest kernel -certainly won't be too happy about sudden change of the device's ROM -content. - -So the issue we're dealing with here is any unwanted memory related -device initialization upon cpr. - -For now the only thing that comes to my mind is to make a test where we -put as many devices as we can into a VM, make ram blocks RO upon cpr -(and remap them as RW later after migration is done, if needed), and -catch any unwanted memory violations.  As Den suggested, we might -consider adding that behaviour as a separate non-default option (or -"migrate" command flag specific to cpr-transfer), which would only be -used in the testing. -I'll look into adding an option, but there may be too many false positives, -such as the qxl_set_mode case above. And the maintainers may object to me -eliminating the false positives by adding more CPR_IN tests, due to gratuitous -(from their POV) ugliness. - -But I will use the technique to look for more write violations. -Andrey -No way. ACPI with the source must be used in the same way as BIOSes -and optional ROMs. -Yup, its a bug. Will fix. - -- Steve - -see -1741380954-341079-1-git-send-email-steven.sistare@oracle.com -/">https://lore.kernel.org/qemu-devel/ -1741380954-341079-1-git-send-email-steven.sistare@oracle.com -/ -- Steve - -On 3/6/2025 11:13 AM, Steven Sistare wrote: -On 3/6/2025 10:52 AM, Denis V. Lunev wrote: -On 3/6/25 16:16, Andrey Drobyshev wrote: -On 3/5/25 11:19 PM, Steven Sistare wrote: -On 3/5/2025 11:50 AM, Andrey Drobyshev wrote: -On 3/4/25 9:05 PM, Steven Sistare wrote: -On 2/28/2025 1:37 PM, Andrey Drobyshev wrote: -On 2/28/25 8:35 PM, Andrey Drobyshev wrote: -On 2/28/25 8:20 PM, Steven Sistare wrote: -On 2/28/2025 1:13 PM, Steven Sistare wrote: -On 2/28/2025 12:39 PM, Andrey Drobyshev wrote: -Hi all, - -We've been experimenting with cpr-transfer migration mode recently -and -have discovered the following issue with the guest QXL driver: - -Run migration source: -EMULATOR=/path/to/emulator -ROOTFS=/path/to/image -QMPSOCK=/var/run/alma8qmp-src.sock - -$EMULATOR -enable-kvm \ -        -machine q35 \ -        -cpu host -smp 2 -m 2G \ -        -object memory-backend-file,id=ram0,size=2G,mem-path=/ -dev/shm/ -ram0,share=on\ -        -machine memory-backend=ram0 \ -        -machine aux-ram-share=on \ -        -drive file=$ROOTFS,media=disk,if=virtio \ -        -qmp unix:$QMPSOCK,server=on,wait=off \ -        -nographic \ -        -device qxl-vga -Run migration target: -EMULATOR=/path/to/emulator -ROOTFS=/path/to/image -QMPSOCK=/var/run/alma8qmp-dst.sock -$EMULATOR -enable-kvm \ -        -machine q35 \ -        -cpu host -smp 2 -m 2G \ -        -object memory-backend-file,id=ram0,size=2G,mem-path=/ -dev/shm/ -ram0,share=on\ -        -machine memory-backend=ram0 \ -        -machine aux-ram-share=on \ -        -drive file=$ROOTFS,media=disk,if=virtio \ -        -qmp unix:$QMPSOCK,server=on,wait=off \ -        -nographic \ -        -device qxl-vga \ -        -incoming tcp:0:44444 \ -        -incoming '{"channel-type": "cpr", "addr": { "transport": -"socket", "type": "unix", "path": "/var/run/alma8cpr-dst.sock"}}' -Launch the migration: -QMPSHELL=/root/src/qemu/master/scripts/qmp/qmp-shell -QMPSOCK=/var/run/alma8qmp-src.sock - -$QMPSHELL -p $QMPSOCK < /dev/tty3 -            done -done - -echo "bug could not be reproduced" -exit 0 -The bug itself seems to remain unfixed, as I was able to reproduce -that -with Fedora 41 guest, as well as AlmaLinux 8 guest. However our -cpr-transfer code also seems to be buggy as it triggers the crash - -without the cpr-transfer migration the above reproduce doesn't -lead to -crash on the source VM. - -I suspect that, as cpr-transfer doesn't migrate the guest -memory, but -rather passes it through the memory backend object, our code might -somehow corrupt the VRAM.  However, I wasn't able to trace the -corruption so far. - -Could somebody help the investigation and take a look into -this?  Any -suggestions would be appreciated.  Thanks! -Possibly some memory region created by qxl is not being preserved. -Try adding these traces to see what is preserved: - --trace enable='*cpr*' --trace enable='*ram_alloc*' -Also try adding this patch to see if it flags any ram blocks as not -compatible with cpr.  A message is printed at migration start time. -    -https://lore.kernel.org/qemu-devel/1740667681-257312-1-git-send- -email- -steven.sistare@oracle.com/ - -- Steve -With the traces enabled + the "migration: ram block cpr blockers" -patch -applied: - -Source: -cpr_find_fd pc.bios, id 0 returns -1 -cpr_save_fd pc.bios, id 0, fd 22 -qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 22 host -0x7fec18e00000 -cpr_find_fd pc.rom, id 0 returns -1 -cpr_save_fd pc.rom, id 0, fd 23 -qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 23 host -0x7fec18c00000 -cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns -1 -cpr_save_fd 0000:00:01.0/e1000e.rom, id 0, fd 24 -qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size -262144 fd 24 host 0x7fec18a00000 -cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns -1 -cpr_save_fd 0000:00:02.0/vga.vram, id 0, fd 25 -qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size -67108864 fd 25 host 0x7feb77e00000 -cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns -1 -cpr_save_fd 0000:00:02.0/qxl.vrom, id 0, fd 27 -qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 -fd 27 host 0x7fec18800000 -cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns -1 -cpr_save_fd 0000:00:02.0/qxl.vram, id 0, fd 28 -qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size -67108864 fd 28 host 0x7feb73c00000 -cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns -1 -cpr_save_fd 0000:00:02.0/qxl.rom, id 0, fd 34 -qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 -fd 34 host 0x7fec18600000 -cpr_find_fd /rom@etc/acpi/tables, id 0 returns -1 -cpr_save_fd /rom@etc/acpi/tables, id 0, fd 35 -qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size -2097152 fd 35 host 0x7fec18200000 -cpr_find_fd /rom@etc/table-loader, id 0 returns -1 -cpr_save_fd /rom@etc/table-loader, id 0, fd 36 -qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 -fd 36 host 0x7feb8b600000 -cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns -1 -cpr_save_fd /rom@etc/acpi/rsdp, id 0, fd 37 -qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd -37 host 0x7feb8b400000 - -cpr_state_save cpr-transfer mode -cpr_transfer_output /var/run/alma8cpr-dst.sock -Target: -cpr_transfer_input /var/run/alma8cpr-dst.sock -cpr_state_load cpr-transfer mode -cpr_find_fd pc.bios, id 0 returns 20 -qemu_ram_alloc_shared pc.bios size 262144 max_size 262144 fd 20 host -0x7fcdc9800000 -cpr_find_fd pc.rom, id 0 returns 19 -qemu_ram_alloc_shared pc.rom size 131072 max_size 131072 fd 19 host -0x7fcdc9600000 -cpr_find_fd 0000:00:01.0/e1000e.rom, id 0 returns 18 -qemu_ram_alloc_shared 0000:00:01.0/e1000e.rom size 262144 max_size -262144 fd 18 host 0x7fcdc9400000 -cpr_find_fd 0000:00:02.0/vga.vram, id 0 returns 17 -qemu_ram_alloc_shared 0000:00:02.0/vga.vram size 67108864 max_size -67108864 fd 17 host 0x7fcd27e00000 -cpr_find_fd 0000:00:02.0/qxl.vrom, id 0 returns 16 -qemu_ram_alloc_shared 0000:00:02.0/qxl.vrom size 8192 max_size 8192 -fd 16 host 0x7fcdc9200000 -cpr_find_fd 0000:00:02.0/qxl.vram, id 0 returns 15 -qemu_ram_alloc_shared 0000:00:02.0/qxl.vram size 67108864 max_size -67108864 fd 15 host 0x7fcd23c00000 -cpr_find_fd 0000:00:02.0/qxl.rom, id 0 returns 14 -qemu_ram_alloc_shared 0000:00:02.0/qxl.rom size 65536 max_size 65536 -fd 14 host 0x7fcdc8800000 -cpr_find_fd /rom@etc/acpi/tables, id 0 returns 13 -qemu_ram_alloc_shared /rom@etc/acpi/tables size 131072 max_size -2097152 fd 13 host 0x7fcdc8400000 -cpr_find_fd /rom@etc/table-loader, id 0 returns 11 -qemu_ram_alloc_shared /rom@etc/table-loader size 4096 max_size 65536 -fd 11 host 0x7fcdc8200000 -cpr_find_fd /rom@etc/acpi/rsdp, id 0 returns 10 -qemu_ram_alloc_shared /rom@etc/acpi/rsdp size 4096 max_size 4096 fd -10 host 0x7fcd3be00000 -Looks like both vga.vram and qxl.vram are being preserved (with the -same -addresses), and no incompatible ram blocks are found during migration. -Sorry, addressed are not the same, of course.  However corresponding -ram -blocks do seem to be preserved and initialized. -So far, I have not reproduced the guest driver failure. - -However, I have isolated places where new QEMU improperly writes to -the qxl memory regions prior to starting the guest, by mmap'ing them -readonly after cpr: - -    qemu_ram_alloc_internal() -      if (reused && (strstr(name, "qxl") || strstr("name", "vga"))) -          ram_flags |= RAM_READONLY; -      new_block = qemu_ram_alloc_from_fd(...) - -I have attached a draft fix; try it and let me know. -My console window looks fine before and after cpr, using --vnc $hostip:0 -vga qxl - -- Steve -Regarding the reproduce: when I launch the buggy version with the same -options as you, i.e. "-vnc 0.0.0.0:$port -vga qxl", and do cpr-transfer, -my VNC client silently hangs on the target after a while.  Could it -happen on your stand as well? -cpr does not preserve the vnc connection and session.  To test, I specify -port 0 for the source VM and port 1 for the dest.  When the src vnc goes -dormant the dest vnc becomes active. -Sure, I meant that VNC on the dest (on the port 1) works for a while -after the migration and then hangs, apparently after the guest QXL crash. -Could you try launching VM with -"-nographic -device qxl-vga"?  That way VM's serial console is given you -directly in the shell, so when qxl driver crashes you're still able to -inspect the kernel messages. -I have been running like that, but have not reproduced the qxl driver -crash, -and I suspect my guest image+kernel is too old. -Yes, that's probably the case.  But the crash occurs on my Fedora 41 -guest with the 6.11.5-300.fc41.x86_64 kernel, so newer kernels seem to -be buggy. -However, once I realized the -issue was post-cpr modification of qxl memory, I switched my attention -to the -fix. -As for your patch, I can report that it doesn't resolve the issue as it -is.  But I was able to track down another possible memory corruption -using your approach with readonly mmap'ing: -Program terminated with signal SIGSEGV, Segmentation fault. -#0  init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 -412         d->ram->magic       = cpu_to_le32(QXL_RAM_MAGIC); -[Current thread is 1 (Thread 0x7f1a4f83b480 (LWP 229798))] -(gdb) bt -#0  init_qxl_ram (d=0x5638996e0e70) at ../hw/display/qxl.c:412 -#1  0x0000563896e7f467 in qxl_realize_common (qxl=0x5638996e0e70, -errp=0x7ffd3c2b8170) at ../hw/display/qxl.c:2142 -#2  0x0000563896e7fda1 in qxl_realize_primary (dev=0x5638996e0e70, -errp=0x7ffd3c2b81d0) at ../hw/display/qxl.c:2257 -#3  0x0000563896c7e8f2 in pci_qdev_realize (qdev=0x5638996e0e70, -errp=0x7ffd3c2b8250) at ../hw/pci/pci.c:2174 -#4  0x00005638970eb54b in device_set_realized (obj=0x5638996e0e70, -value=true, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:494 -#5  0x00005638970f5e14 in property_set_bool (obj=0x5638996e0e70, -v=0x5638996f3770, name=0x56389759b141 "realized", -opaque=0x5638987893d0, errp=0x7ffd3c2b84e0) -      at ../qom/object.c:2374 -#6  0x00005638970f39f8 in object_property_set (obj=0x5638996e0e70, -name=0x56389759b141 "realized", v=0x5638996f3770, errp=0x7ffd3c2b84e0) -      at ../qom/object.c:1449 -#7  0x00005638970f8586 in object_property_set_qobject -(obj=0x5638996e0e70, name=0x56389759b141 "realized", -value=0x5638996df900, errp=0x7ffd3c2b84e0) -      at ../qom/qom-qobject.c:28 -#8  0x00005638970f3d8d in object_property_set_bool -(obj=0x5638996e0e70, name=0x56389759b141 "realized", value=true, -errp=0x7ffd3c2b84e0) -      at ../qom/object.c:1519 -#9  0x00005638970eacb0 in qdev_realize (dev=0x5638996e0e70, -bus=0x563898cf3c20, errp=0x7ffd3c2b84e0) at ../hw/core/qdev.c:276 -#10 0x0000563896dba675 in qdev_device_add_from_qdict -(opts=0x5638996dfe50, from_json=false, errp=0x7ffd3c2b84e0) at ../ -system/qdev-monitor.c:714 -#11 0x0000563896dba721 in qdev_device_add (opts=0x563898786150, -errp=0x56389855dc40 ) at ../system/qdev-monitor.c:733 -#12 0x0000563896dc48f1 in device_init_func (opaque=0x0, -opts=0x563898786150, errp=0x56389855dc40 ) at ../system/ -vl.c:1207 -#13 0x000056389737a6cc in qemu_opts_foreach -      (list=0x563898427b60 , func=0x563896dc48ca -, opaque=0x0, errp=0x56389855dc40 ) -      at ../util/qemu-option.c:1135 -#14 0x0000563896dc89b5 in qemu_create_cli_devices () at ../system/ -vl.c:2745 -#15 0x0000563896dc8c00 in qmp_x_exit_preconfig (errp=0x56389855dc40 -) at ../system/vl.c:2806 -#16 0x0000563896dcb5de in qemu_init (argc=33, argv=0x7ffd3c2b8948) -at ../system/vl.c:3838 -#17 0x0000563897297323 in main (argc=33, argv=0x7ffd3c2b8948) at ../ -system/main.c:72 -So the attached adjusted version of your patch does seem to help.  At -least I can't reproduce the crash on my stand. -Thanks for the stack trace; the calls to SPICE_RING_INIT in init_qxl_ram -are -definitely harmful.  Try V2 of the patch, attached, which skips the lines -of init_qxl_ram that modify guest memory. -Thanks, your v2 patch does seem to prevent the crash.  Would you re-send -it to the list as a proper fix? -Yes.  Was waiting for your confirmation. -I'm wondering, could it be useful to explicitly mark all the reused -memory regions readonly upon cpr-transfer, and then make them writable -back again after the migration is done?  That way we will be segfaulting -early on instead of debugging tricky memory corruptions. -It's a useful debugging technique, but changing protection on a large -memory region -can be too expensive for production due to TLB shootdowns. - -Also, there are cases where writes are performed but the value is -guaranteed to -be the same: -   qxl_post_load() -     qxl_set_mode() -       d->rom->mode = cpu_to_le32(modenr); -The value is the same because mode and shadow_rom.mode were passed in -vmstate -from old qemu. -There're also cases where devices' ROM might be re-initialized.  E.g. -this segfault occures upon further exploration of RO mapped RAM blocks: -Program terminated with signal SIGSEGV, Segmentation fault. -#0  __memmove_avx_unaligned_erms () at -../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:664 -664             rep     movsb -[Current thread is 1 (Thread 0x7f6e7d08b480 (LWP 310379))] -(gdb) bt -#0  __memmove_avx_unaligned_erms () at -../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:664 -#1  0x000055aa1d030ecd in rom_set_mr (rom=0x55aa200ba380, owner=0x55aa2019ac10, -name=0x7fffb8272bc0 "/rom@etc/acpi/tables", ro=true) -     at ../hw/core/loader.c:1032 -#2  0x000055aa1d031577 in rom_add_blob -     (name=0x55aa1da51f13 "etc/acpi/tables", blob=0x55aa208a1070, len=131072, max_len=2097152, -addr=18446744073709551615, fw_file_name=0x55aa1da51f13 "etc/acpi/tables", -fw_callback=0x55aa1d441f59 , callback_opaque=0x55aa20ff0010, as=0x0, -read_only=true) at ../hw/core/loader.c:1147 -#3  0x000055aa1cfd788d in acpi_add_rom_blob -     (update=0x55aa1d441f59 , opaque=0x55aa20ff0010, -blob=0x55aa1fc9aa00, name=0x55aa1da51f13 "etc/acpi/tables") at ../hw/acpi/utils.c:46 -#4  0x000055aa1d44213f in acpi_setup () at ../hw/i386/acpi-build.c:2720 -#5  0x000055aa1d434199 in pc_machine_done (notifier=0x55aa1ff15050, data=0x0) -at ../hw/i386/pc.c:638 -#6  0x000055aa1d876845 in notifier_list_notify (list=0x55aa1ea25c10 -, data=0x0) at ../util/notify.c:39 -#7  0x000055aa1d039ee5 in qdev_machine_creation_done () at -../hw/core/machine.c:1749 -#8  0x000055aa1d2c7b3e in qemu_machine_creation_done (errp=0x55aa1ea5cc40 -) at ../system/vl.c:2779 -#9  0x000055aa1d2c7c7d in qmp_x_exit_preconfig (errp=0x55aa1ea5cc40 -) at ../system/vl.c:2807 -#10 0x000055aa1d2ca64f in qemu_init (argc=35, argv=0x7fffb82730e8) at -../system/vl.c:3838 -#11 0x000055aa1d79638c in main (argc=35, argv=0x7fffb82730e8) at -../system/main.c:72 -I'm not sure whether ACPI tables ROM in particular is rewritten with the -same content, but there might be cases where ROM can be read from file -system upon initialization.  That is undesirable as guest kernel -certainly won't be too happy about sudden change of the device's ROM -content. - -So the issue we're dealing with here is any unwanted memory related -device initialization upon cpr. - -For now the only thing that comes to my mind is to make a test where we -put as many devices as we can into a VM, make ram blocks RO upon cpr -(and remap them as RW later after migration is done, if needed), and -catch any unwanted memory violations.  As Den suggested, we might -consider adding that behaviour as a separate non-default option (or -"migrate" command flag specific to cpr-transfer), which would only be -used in the testing. -I'll look into adding an option, but there may be too many false positives, -such as the qxl_set_mode case above.  And the maintainers may object to me -eliminating the false positives by adding more CPR_IN tests, due to gratuitous -(from their POV) ugliness. - -But I will use the technique to look for more write violations. -Andrey -No way. ACPI with the source must be used in the same way as BIOSes -and optional ROMs. -Yup, its a bug.  Will fix. - -- Steve - diff --git a/classification_output/01/mistranslation/6178292 b/classification_output/01/mistranslation/6178292 deleted file mode 100644 index f13db3b86..000000000 --- a/classification_output/01/mistranslation/6178292 +++ /dev/null @@ -1,258 +0,0 @@ -mistranslation: 0.930 -semantic: 0.928 -instruction: 0.905 -other: 0.890 - -[BUG][RFC] CPR transfer Issues: Socket permissions and PID files - -Hello, - -While testing CPR transfer I encountered two issues. The first is that the -transfer fails when running with pidfiles due to the destination qemu process -attempting to create the pidfile while it is still locked by the source -process. The second is that the transfer fails when running with the -run-with -user=$USERID parameter. This is because the destination qemu process creates -the UNIX sockets used for the CPR transfer before dropping to the lower -permissioned user, which causes them to be owned by the original user. The -source qemu process then does not have permission to connect to it because it -is already running as the lesser permissioned user. - -Reproducing the first issue: - -Create a source and destination qemu instance associated with the same VM where -both processes have the -pidfile parameter passed on the command line. You -should see the following error on the command line of the second process: - -qemu-system-x86_64: cannot create PID file: Cannot lock pid file: Resource -temporarily unavailable - -Reproducing the second issue: - -Create a source and destination qemu instance associated with the same VM where -both processes have -run-with user=$USERID passed on the command line, where -$USERID is a different user from the one launching the processes. Then attempt -a CPR transfer using UNIX sockets for the main and cpr sockets. You should -receive the following error via QMP: -{"error": {"class": "GenericError", "desc": "Failed to connect to 'cpr.sock': -Permission denied"}} - -I provided a minimal patch that works around the second issue. - -Thank you, -Ben Chaney - ---- -include/system/os-posix.h | 4 ++++ -os-posix.c | 8 -------- -util/qemu-sockets.c | 21 +++++++++++++++++++++ -3 files changed, 25 insertions(+), 8 deletions(-) - -diff --git a/include/system/os-posix.h b/include/system/os-posix.h -index ce5b3bccf8..2a414a914a 100644 ---- a/include/system/os-posix.h -+++ b/include/system/os-posix.h -@@ -55,6 +55,10 @@ void os_setup_limits(void); -void os_setup_post(void); -int os_mlock(bool on_fault); - -+extern struct passwd *user_pwd; -+extern uid_t user_uid; -+extern gid_t user_gid; -+ -/** -* qemu_alloc_stack: -* @sz: pointer to a size_t holding the requested usable stack size -diff --git a/os-posix.c b/os-posix.c -index 52925c23d3..9369b312a0 100644 ---- a/os-posix.c -+++ b/os-posix.c -@@ -86,14 +86,6 @@ void os_set_proc_name(const char *s) -} - - --/* -- * Must set all three of these at once. -- * Legal combinations are unset by name by uid -- */ --static struct passwd *user_pwd; /* NULL non-NULL NULL */ --static uid_t user_uid = (uid_t)-1; /* -1 -1 >=0 */ --static gid_t user_gid = (gid_t)-1; /* -1 -1 >=0 */ -- -/* -* Prepare to change user ID. user_id can be one of 3 forms: -* - a username, in which case user ID will be changed to its uid, -diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c -index 77477c1cd5..987977ead9 100644 ---- a/util/qemu-sockets.c -+++ b/util/qemu-sockets.c -@@ -871,6 +871,14 @@ static bool saddr_is_tight(UnixSocketAddress *saddr) -#endif -} - -+/* -+ * Must set all three of these at once. -+ * Legal combinations are unset by name by uid -+ */ -+struct passwd *user_pwd; /* NULL non-NULL NULL */ -+uid_t user_uid = (uid_t)-1; /* -1 -1 >=0 */ -+gid_t user_gid = (gid_t)-1; /* -1 -1 >=0 */ -+ -static int unix_listen_saddr(UnixSocketAddress *saddr, -int num, -Error **errp) -@@ -947,6 +955,19 @@ static int unix_listen_saddr(UnixSocketAddress *saddr, -error_setg_errno(errp, errno, "Failed to bind socket to %s", path); -goto err; -} -+ if (user_pwd) { -+ if (chown(un.sun_path, user_pwd->pw_uid, user_pwd->pw_gid) < 0) { -+ error_setg_errno(errp, errno, "Failed to change permissions on socket %s", -path); -+ goto err; -+ } -+ } -+ else if (user_uid != -1 && user_gid != -1) { -+ if (chown(un.sun_path, user_uid, user_gid) < 0) { -+ error_setg_errno(errp, errno, "Failed to change permissions on socket %s", -path); -+ goto err; -+ } -+ } -+ -if (listen(sock, num) < 0) { -error_setg_errno(errp, errno, "Failed to listen on socket"); -goto err; --- -2.40.1 - -Thank you Ben. I appreciate you testing CPR and shaking out the bugs. -I will study these and propose patches. - -My initial reaction to the pidfile issue is that the orchestration layer must -pass a different filename when starting the destination qemu instance. When -using live update without containers, these types of resource conflicts in the -global namespaces are a known issue. - -- Steve - -On 3/14/2025 2:33 PM, Chaney, Ben wrote: -Hello, - -While testing CPR transfer I encountered two issues. The first is that the -transfer fails when running with pidfiles due to the destination qemu process -attempting to create the pidfile while it is still locked by the source -process. The second is that the transfer fails when running with the -run-with -user=$USERID parameter. This is because the destination qemu process creates -the UNIX sockets used for the CPR transfer before dropping to the lower -permissioned user, which causes them to be owned by the original user. The -source qemu process then does not have permission to connect to it because it -is already running as the lesser permissioned user. - -Reproducing the first issue: - -Create a source and destination qemu instance associated with the same VM where -both processes have the -pidfile parameter passed on the command line. You -should see the following error on the command line of the second process: - -qemu-system-x86_64: cannot create PID file: Cannot lock pid file: Resource -temporarily unavailable - -Reproducing the second issue: - -Create a source and destination qemu instance associated with the same VM where -both processes have -run-with user=$USERID passed on the command line, where -$USERID is a different user from the one launching the processes. Then attempt -a CPR transfer using UNIX sockets for the main and cpr sockets. You should -receive the following error via QMP: -{"error": {"class": "GenericError", "desc": "Failed to connect to 'cpr.sock': -Permission denied"}} - -I provided a minimal patch that works around the second issue. - -Thank you, -Ben Chaney - ---- -include/system/os-posix.h | 4 ++++ -os-posix.c | 8 -------- -util/qemu-sockets.c | 21 +++++++++++++++++++++ -3 files changed, 25 insertions(+), 8 deletions(-) - -diff --git a/include/system/os-posix.h b/include/system/os-posix.h -index ce5b3bccf8..2a414a914a 100644 ---- a/include/system/os-posix.h -+++ b/include/system/os-posix.h -@@ -55,6 +55,10 @@ void os_setup_limits(void); -void os_setup_post(void); -int os_mlock(bool on_fault); - -+extern struct passwd *user_pwd; -+extern uid_t user_uid; -+extern gid_t user_gid; -+ -/** -* qemu_alloc_stack: -* @sz: pointer to a size_t holding the requested usable stack size -diff --git a/os-posix.c b/os-posix.c -index 52925c23d3..9369b312a0 100644 ---- a/os-posix.c -+++ b/os-posix.c -@@ -86,14 +86,6 @@ void os_set_proc_name(const char *s) -} - - --/* -- * Must set all three of these at once. -- * Legal combinations are unset by name by uid -- */ --static struct passwd *user_pwd; /* NULL non-NULL NULL */ --static uid_t user_uid = (uid_t)-1; /* -1 -1 >=0 */ --static gid_t user_gid = (gid_t)-1; /* -1 -1 >=0 */ -- -/* -* Prepare to change user ID. user_id can be one of 3 forms: -* - a username, in which case user ID will be changed to its uid, -diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c -index 77477c1cd5..987977ead9 100644 ---- a/util/qemu-sockets.c -+++ b/util/qemu-sockets.c -@@ -871,6 +871,14 @@ static bool saddr_is_tight(UnixSocketAddress *saddr) -#endif -} - -+/* -+ * Must set all three of these at once. -+ * Legal combinations are unset by name by uid -+ */ -+struct passwd *user_pwd; /* NULL non-NULL NULL */ -+uid_t user_uid = (uid_t)-1; /* -1 -1 >=0 */ -+gid_t user_gid = (gid_t)-1; /* -1 -1 >=0 */ -+ -static int unix_listen_saddr(UnixSocketAddress *saddr, -int num, -Error **errp) -@@ -947,6 +955,19 @@ static int unix_listen_saddr(UnixSocketAddress *saddr, -error_setg_errno(errp, errno, "Failed to bind socket to %s", path); -goto err; -} -+ if (user_pwd) { -+ if (chown(un.sun_path, user_pwd->pw_uid, user_pwd->pw_gid) < 0) { -+ error_setg_errno(errp, errno, "Failed to change permissions on socket %s", -path); -+ goto err; -+ } -+ } -+ else if (user_uid != -1 && user_gid != -1) { -+ if (chown(un.sun_path, user_uid, user_gid) < 0) { -+ error_setg_errno(errp, errno, "Failed to change permissions on socket %s", -path); -+ goto err; -+ } -+ } -+ -if (listen(sock, num) < 0) { -error_setg_errno(errp, errno, "Failed to listen on socket"); -goto err; --- -2.40.1 - diff --git a/classification_output/01/mistranslation/64322995 b/classification_output/01/mistranslation/64322995 new file mode 100644 index 000000000..2f16ce872 --- /dev/null +++ b/classification_output/01/mistranslation/64322995 @@ -0,0 +1,54 @@ +mistranslation: 0.936 +semantic: 0.906 +other: 0.881 +instruction: 0.864 + +[Qemu-devel] [BUG] trace: QEMU hangs on initialization with the "simple" backend + +While starting the softmmu version of QEMU, the simple backend waits for the +writeout thread to signal a condition variable when initializing the output file +path. But since the writeout thread has not been created, it just waits forever. + +Thanks, + Lluis + +On Tue, Feb 09, 2016 at 09:24:04PM +0100, Lluís Vilanova wrote: +> +While starting the softmmu version of QEMU, the simple backend waits for the +> +writeout thread to signal a condition variable when initializing the output +> +file +> +path. But since the writeout thread has not been created, it just waits +> +forever. +Denis Lunev posted a fix: +https://patchwork.ozlabs.org/patch/580968/ +Stefan +signature.asc +Description: +PGP signature + +Stefan Hajnoczi writes: + +> +On Tue, Feb 09, 2016 at 09:24:04PM +0100, Lluís Vilanova wrote: +> +> While starting the softmmu version of QEMU, the simple backend waits for the +> +> writeout thread to signal a condition variable when initializing the output +> +> file +> +> path. But since the writeout thread has not been created, it just waits +> +> forever. +> +Denis Lunev posted a fix: +> +https://patchwork.ozlabs.org/patch/580968/ +Great, thanks. + +Lluis + diff --git a/classification_output/01/mistranslation/6866700 b/classification_output/01/mistranslation/6866700 deleted file mode 100644 index 2f16ce872..000000000 --- a/classification_output/01/mistranslation/6866700 +++ /dev/null @@ -1,54 +0,0 @@ -mistranslation: 0.936 -semantic: 0.906 -other: 0.881 -instruction: 0.864 - -[Qemu-devel] [BUG] trace: QEMU hangs on initialization with the "simple" backend - -While starting the softmmu version of QEMU, the simple backend waits for the -writeout thread to signal a condition variable when initializing the output file -path. But since the writeout thread has not been created, it just waits forever. - -Thanks, - Lluis - -On Tue, Feb 09, 2016 at 09:24:04PM +0100, Lluís Vilanova wrote: -> -While starting the softmmu version of QEMU, the simple backend waits for the -> -writeout thread to signal a condition variable when initializing the output -> -file -> -path. But since the writeout thread has not been created, it just waits -> -forever. -Denis Lunev posted a fix: -https://patchwork.ozlabs.org/patch/580968/ -Stefan -signature.asc -Description: -PGP signature - -Stefan Hajnoczi writes: - -> -On Tue, Feb 09, 2016 at 09:24:04PM +0100, Lluís Vilanova wrote: -> -> While starting the softmmu version of QEMU, the simple backend waits for the -> -> writeout thread to signal a condition variable when initializing the output -> -> file -> -> path. But since the writeout thread has not been created, it just waits -> -> forever. -> -Denis Lunev posted a fix: -> -https://patchwork.ozlabs.org/patch/580968/ -Great, thanks. - -Lluis - diff --git a/classification_output/01/mistranslation/70294255 b/classification_output/01/mistranslation/70294255 new file mode 100644 index 000000000..67353acda --- /dev/null +++ b/classification_output/01/mistranslation/70294255 @@ -0,0 +1,1061 @@ +mistranslation: 0.862 +semantic: 0.858 +instruction: 0.856 +other: 0.852 + +[Qemu-devel] 答复: Re: 答复: Re: 答复: Re: 答复: Re: [BUG]COLO failover hang + +hi: + +yes.it is better. + +And should we delete + + + + +#ifdef WIN32 + + QIO_CHANNEL(cioc)->event = CreateEvent(NULL, FALSE, FALSE, NULL) + +#endif + + + + +in qio_channel_socket_accept? + +qio_channel_socket_new already have it. + + + + + + + + + + + + +原始邮件 + + + +发件人: address@hidden +收件人:王广10165992 +抄送人: address@hidden address@hidden address@hidden address@hidden +日 期 :2017å¹´03月22日 15:03 +主 题 :Re: [Qemu-devel] 答复: Re: 答复: Re: 答复: Re: [BUG]COLO failover hang + + + + + +Hi, + +On 2017/3/22 9:42, address@hidden wrote: +> diff --git a/migration/socket.c b/migration/socket.c +> +> +> index 13966f1..d65a0ea 100644 +> +> +> --- a/migration/socket.c +> +> +> +++ b/migration/socket.c +> +> +> @@ -147,8 +147,9 @@ static gboolean +socket_accept_incoming_migration(QIOChannel *ioc, +> +> +> } +> +> +> +> +> +> trace_migration_socket_incoming_accepted() +> +> +> +> +> +> qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") +> +> +> + qio_channel_set_feature(QIO_CHANNEL(sioc), QIO_CHANNEL_FEATURE_SHUTDOWN) +> +> +> migration_channel_process_incoming(migrate_get_current(), +> +> +> QIO_CHANNEL(sioc)) +> +> +> object_unref(OBJECT(sioc)) +> +> +> +> +> Is this patch ok? +> + +Yes, i think this works, but a better way maybe to call +qio_channel_set_feature() +in qio_channel_socket_accept(), we didn't set the SHUTDOWN feature for the +socket accept fd, +Or fix it by this: + +diff --git a/io/channel-socket.c b/io/channel-socket.c +index f546c68..ce6894c 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -330,9 +330,8 @@ qio_channel_socket_accept(QIOChannelSocket *ioc, + Error **errp) + { + QIOChannelSocket *cioc +- +- cioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET)) +- cioc->fd = -1 ++ ++ cioc = qio_channel_socket_new() + cioc->remoteAddrLen = sizeof(ioc->remoteAddr) + cioc->localAddrLen = sizeof(ioc->localAddr) + + +Thanks, +Hailiang + +> I have test it . The test could not hang any more. +> +> +> +> +> +> +> +> +> +> +> +> +> 原始邮件 +> +> +> +> 发件人: address@hidden +> 收件人: address@hidden address@hidden +> 抄送人: address@hidden address@hidden address@hidden +> 日 期 :2017å¹´03月22日 09:11 +> 主 题 :Re: [Qemu-devel] 答复: Re: 答复: Re: [BUG]COLO failover hang +> +> +> +> +> +> On 2017/3/21 19:56, Dr. David Alan Gilbert wrote: +> > * Hailiang Zhang (address@hidden) wrote: +> >> Hi, +> >> +> >> Thanks for reporting this, and i confirmed it in my test, and it is a bug. +> >> +> >> Though we tried to call qemu_file_shutdown() to shutdown the related fd, in +> >> case COLO thread/incoming thread is stuck in read/write() while do +failover, +> >> but it didn't take effect, because all the fd used by COLO (also migration) +> >> has been wrapped by qio channel, and it will not call the shutdown API if +> >> we didn't qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN). +> >> +> >> Cc: Dr. David Alan Gilbert address@hidden +> >> +> >> I doubted migration cancel has the same problem, it may be stuck in write() +> >> if we tried to cancel migration. +> >> +> >> void fd_start_outgoing_migration(MigrationState *s, const char *fdname, +Error **errp) +> >> { +> >> qio_channel_set_name(QIO_CHANNEL(ioc), "migration-fd-outgoing") +> >> migration_channel_connect(s, ioc, NULL) +> >> ... ... +> >> We didn't call qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN) above, +> >> and the +> >> migrate_fd_cancel() +> >> { +> >> ... ... +> >> if (s->state == MIGRATION_STATUS_CANCELLING && f) { +> >> qemu_file_shutdown(f) --> This will not take effect. No ? +> >> } +> >> } +> > +> > (cc'd in Daniel Berrange). +> > I see that we call qio_channel_set_feature(ioc, +QIO_CHANNEL_FEATURE_SHUTDOWN) at the +> > top of qio_channel_socket_new so I think that's safe isn't it? +> > +> +> Hmm, you are right, this problem is only exist for the migration incoming fd, +thanks. +> +> > Dave +> > +> >> Thanks, +> >> Hailiang +> >> +> >> On 2017/3/21 16:10, address@hidden wrote: +> >>> Thank you。 +> >>> +> >>> I have test aready。 +> >>> +> >>> When the Primary Node panic,the Secondary Node qemu hang at the same +place。 +> >>> +> >>> Incorrding +http://wiki.qemu-project.org/Features/COLO +,kill Primary Node +qemu will not produce the problem,but Primary Node panic can。 +> >>> +> >>> I think due to the feature of channel does not support +QIO_CHANNEL_FEATURE_SHUTDOWN. +> >>> +> >>> +> >>> when failover,channel_shutdown could not shut down the channel. +> >>> +> >>> +> >>> so the colo_process_incoming_thread will hang at recvmsg. +> >>> +> >>> +> >>> I test a patch: +> >>> +> >>> +> >>> diff --git a/migration/socket.c b/migration/socket.c +> >>> +> >>> +> >>> index 13966f1..d65a0ea 100644 +> >>> +> >>> +> >>> --- a/migration/socket.c +> >>> +> >>> +> >>> +++ b/migration/socket.c +> >>> +> >>> +> >>> @@ -147,8 +147,9 @@ static gboolean +socket_accept_incoming_migration(QIOChannel *ioc, +> >>> +> >>> +> >>> } +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> trace_migration_socket_incoming_accepted() +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> qio_channel_set_name(QIO_CHANNEL(sioc), +"migration-socket-incoming") +> >>> +> >>> +> >>> + qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN) +> >>> +> >>> +> >>> migration_channel_process_incoming(migrate_get_current(), +> >>> +> >>> +> >>> QIO_CHANNEL(sioc)) +> >>> +> >>> +> >>> object_unref(OBJECT(sioc)) +> >>> +> >>> +> >>> +> >>> +> >>> My test will not hang any more. +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> 原始邮件 +> >>> +> >>> +> >>> +> >>> 发件人: address@hidden +> >>> 收件人:王广10165992 address@hidden +> >>> 抄送人: address@hidden address@hidden +> >>> 日 期 :2017å¹´03月21日 15:58 +> >>> 主 题 :Re: [Qemu-devel] 答复: Re: [BUG]COLO failover hang +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> Hi,Wang. +> >>> +> >>> You can test this branch: +> >>> +> >>> +https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk +> >>> +> >>> and please follow wiki ensure your own configuration correctly. +> >>> +> >>> +http://wiki.qemu-project.org/Features/COLO +> >>> +> >>> +> >>> Thanks +> >>> +> >>> Zhang Chen +> >>> +> >>> +> >>> On 03/21/2017 03:27 PM, address@hidden wrote: +> >>> > +> >>> > hi. +> >>> > +> >>> > I test the git qemu master have the same problem. +> >>> > +> >>> > (gdb) bt +> >>> > +> >>> > #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, +> >>> > niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 +> >>> > +> >>> > #1 0x00007f658e4aa0c2 in qio_channel_read +> >>> > (address@hidden, address@hidden "", +> >>> > address@hidden, address@hidden) at io/channel.c:114 +> >>> > +> >>> > #2 0x00007f658e3ea990 in channel_get_buffer (opaque=<optimized out>, +> >>> > buf=0x7f65907cb838 "", pos=<optimized out>, size=32768) at +> >>> > migration/qemu-file-channel.c:78 +> >>> > +> >>> > #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at +> >>> > migration/qemu-file.c:295 +> >>> > +> >>> > #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, +> >>> > address@hidden) at migration/qemu-file.c:555 +> >>> > +> >>> > #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at +> >>> > migration/qemu-file.c:568 +> >>> > +> >>> > #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at +> >>> > migration/qemu-file.c:648 +> >>> > +> >>> > #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, +> >>> > address@hidden) at migration/colo.c:244 +> >>> > +> >>> > #8 0x00007f658e3e681e in colo_receive_check_message (f=<optimized +> >>> > out>, address@hidden, +> >>> > address@hidden) +> >>> > +> >>> > at migration/colo.c:264 +> >>> > +> >>> > #9 0x00007f658e3e740e in colo_process_incoming_thread +> >>> > (opaque=0x7f658eb30360 <mis_current.31286>) at migration/colo.c:577 +> >>> > +> >>> > #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 +> >>> > +> >>> > #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 +> >>> > +> >>> > (gdb) p ioc->name +> >>> > +> >>> > $2 = 0x7f658ff7d5c0 "migration-socket-incoming" +> >>> > +> >>> > (gdb) p ioc->features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN +> >>> > +> >>> > $3 = 0 +> >>> > +> >>> > +> >>> > (gdb) bt +> >>> > +> >>> > #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, +> >>> > condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 +> >>> > +> >>> > #1 0x00007fdcc6966350 in g_main_dispatch (context=<optimized out>) at +> >>> > gmain.c:3054 +> >>> > +> >>> > #2 g_main_context_dispatch (context=<optimized out>, +> >>> > address@hidden) at gmain.c:3630 +> >>> > +> >>> > #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 +> >>> > +> >>> > #4 os_host_main_loop_wait (timeout=<optimized out>) at +> >>> > util/main-loop.c:258 +> >>> > +> >>> > #5 main_loop_wait (address@hidden) at +> >>> > util/main-loop.c:506 +> >>> > +> >>> > #6 0x00007fdccb526187 in main_loop () at vl.c:1898 +> >>> > +> >>> > #7 main (argc=<optimized out>, argv=<optimized out>, envp=<optimized +> >>> > out>) at vl.c:4709 +> >>> > +> >>> > (gdb) p ioc->features +> >>> > +> >>> > $1 = 6 +> >>> > +> >>> > (gdb) p ioc->name +> >>> > +> >>> > $2 = 0x7fdcce1b1ab0 "migration-socket-listener" +> >>> > +> >>> > +> >>> > May be socket_accept_incoming_migration should +> >>> > call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? +> >>> > +> >>> > +> >>> > thank you. +> >>> > +> >>> > +> >>> > +> >>> > +> >>> > +> >>> > 原始邮件 +> >>> > address@hidden +> >>> > address@hidden +> >>> > address@hidden@huawei.com> +> >>> > *日 期 :*2017å¹´03月16日 14:46 +> >>> > *主 题 :**Re: [Qemu-devel] COLO failover hang* +> >>> > +> >>> > +> >>> > +> >>> > +> >>> > On 03/15/2017 05:06 PM, wangguang wrote: +> >>> > > am testing QEMU COLO feature described here [QEMU +> >>> > > Wiki]( +http://wiki.qemu-project.org/Features/COLO +). +> >>> > > +> >>> > > When the Primary Node panic,the Secondary Node qemu hang. +> >>> > > hang at recvmsg in qio_channel_socket_readv. +> >>> > > And I run { 'execute': 'nbd-server-stop' } and { "execute": +> >>> > > "x-colo-lost-heartbeat" } in Secondary VM's +> >>> > > monitor,the Secondary Node qemu still hang at recvmsg . +> >>> > > +> >>> > > I found that the colo in qemu is not complete yet. +> >>> > > Do the colo have any plan for development? +> >>> > +> >>> > Yes, We are developing. You can see some of patch we pushing. +> >>> > +> >>> > > Has anyone ever run it successfully? Any help is appreciated! +> >>> > +> >>> > In our internal version can run it successfully, +> >>> > The failover detail you can ask Zhanghailiang for help. +> >>> > Next time if you have some question about COLO, +> >>> > please cc me and zhanghailiang address@hidden +> >>> > +> >>> > +> >>> > Thanks +> >>> > Zhang Chen +> >>> > +> >>> > +> >>> > > +> >>> > > +> >>> > > +> >>> > > centos7.2+qemu2.7.50 +> >>> > > (gdb) bt +> >>> > > #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 +> >>> > > #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=<optimized +out>, +> >>> > > iov=<optimized out>, niov=<optimized out>, fds=0x0, nfds=0x0, +errp=0x0) at +> >>> > > io/channel-socket.c:497 +> >>> > > #2 0x00007f3e03329472 in qio_channel_read (address@hidden, +> >>> > > address@hidden "", address@hidden, +> >>> > > address@hidden) at io/channel.c:97 +> >>> > > #3 0x00007f3e032750e0 in channel_get_buffer (opaque=<optimized out>, +> >>> > > buf=0x7f3e05910f38 "", pos=<optimized out>, size=32768) at +> >>> > > migration/qemu-file-channel.c:78 +> >>> > > #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at +> >>> > > migration/qemu-file.c:257 +> >>> > > #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, +> >>> > > address@hidden) at migration/qemu-file.c:510 +> >>> > > #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at +> >>> > > migration/qemu-file.c:523 +> >>> > > #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at +> >>> > > migration/qemu-file.c:603 +> >>> > > #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, +> >>> > > address@hidden) at migration/colo.c:215 +> >>> > > #9 0x00007f3e0327250d in colo_wait_handle_message +(errp=0x7f3d62bfaa48, +> >>> > > checkpoint_request=<synthetic pointer>, f=<optimized out>) at +> >>> > > migration/colo.c:546 +> >>> > > #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at +> >>> > > migration/colo.c:649 +> >>> > > #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 +> >>> > > #12 0x00007f3dfc9c03ed in clone () from /lib64/libc..so.6 +> >>> > > +> >>> > > +> >>> > > +> >>> > > +> >>> > > +> >>> > > -- +> >>> > > View this message in context: +http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html +> >>> > > Sent from the Developer mailing list archive at Nabble.com. +> >>> > > +> >>> > > +> >>> > > +> >>> > > +> >>> > +> >>> > -- +> >>> > Thanks +> >>> > Zhang Chen +> >>> > +> >>> > +> >>> > +> >>> > +> >>> > +> >>> +> >> +> > -- +> > Dr. David Alan Gilbert / address@hidden / Manchester, UK +> > +> > . +> > +> + +On 2017/3/22 16:09, address@hidden wrote: +hi: + +yes.it is better. + +And should we delete +Yes, you are right. +#ifdef WIN32 + + QIO_CHANNEL(cioc)->event = CreateEvent(NULL, FALSE, FALSE, NULL) + +#endif + + + + +in qio_channel_socket_accept? + +qio_channel_socket_new already have it. + + + + + + + + + + + + +原始邮件 + + + +发件人: address@hidden +收件人:王广10165992 +抄送人: address@hidden address@hidden address@hidden address@hidden +日 期 :2017å¹´03月22日 15:03 +主 题 :Re: [Qemu-devel] 答复: Re: 答复: Re: 答复: Re: [BUG]COLO failover hang + + + + + +Hi, + +On 2017/3/22 9:42, address@hidden wrote: +> diff --git a/migration/socket.c b/migration/socket.c +> +> +> index 13966f1..d65a0ea 100644 +> +> +> --- a/migration/socket.c +> +> +> +++ b/migration/socket.c +> +> +> @@ -147,8 +147,9 @@ static gboolean +socket_accept_incoming_migration(QIOChannel *ioc, +> +> +> } +> +> +> +> +> +> trace_migration_socket_incoming_accepted() +> +> +> +> +> +> qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") +> +> +> + qio_channel_set_feature(QIO_CHANNEL(sioc), QIO_CHANNEL_FEATURE_SHUTDOWN) +> +> +> migration_channel_process_incoming(migrate_get_current(), +> +> +> QIO_CHANNEL(sioc)) +> +> +> object_unref(OBJECT(sioc)) +> +> +> +> +> Is this patch ok? +> + +Yes, i think this works, but a better way maybe to call +qio_channel_set_feature() +in qio_channel_socket_accept(), we didn't set the SHUTDOWN feature for the +socket accept fd, +Or fix it by this: + +diff --git a/io/channel-socket.c b/io/channel-socket.c +index f546c68..ce6894c 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -330,9 +330,8 @@ qio_channel_socket_accept(QIOChannelSocket *ioc, + Error **errp) + { + QIOChannelSocket *cioc +- +- cioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET)) +- cioc->fd = -1 ++ ++ cioc = qio_channel_socket_new() + cioc->remoteAddrLen = sizeof(ioc->remoteAddr) + cioc->localAddrLen = sizeof(ioc->localAddr) + + +Thanks, +Hailiang + +> I have test it . The test could not hang any more. +> +> +> +> +> +> +> +> +> +> +> +> +> 原始邮件 +> +> +> +> 发件人: address@hidden +> 收件人: address@hidden address@hidden +> 抄送人: address@hidden address@hidden address@hidden +> 日 期 :2017å¹´03月22日 09:11 +> 主 题 :Re: [Qemu-devel] 答复: Re: 答复: Re: [BUG]COLO failover hang +> +> +> +> +> +> On 2017/3/21 19:56, Dr. David Alan Gilbert wrote: +> > * Hailiang Zhang (address@hidden) wrote: +> >> Hi, +> >> +> >> Thanks for reporting this, and i confirmed it in my test, and it is a bug. +> >> +> >> Though we tried to call qemu_file_shutdown() to shutdown the related fd, in +> >> case COLO thread/incoming thread is stuck in read/write() while do +failover, +> >> but it didn't take effect, because all the fd used by COLO (also migration) +> >> has been wrapped by qio channel, and it will not call the shutdown API if +> >> we didn't qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN). +> >> +> >> Cc: Dr. David Alan Gilbert address@hidden +> >> +> >> I doubted migration cancel has the same problem, it may be stuck in write() +> >> if we tried to cancel migration. +> >> +> >> void fd_start_outgoing_migration(MigrationState *s, const char *fdname, +Error **errp) +> >> { +> >> qio_channel_set_name(QIO_CHANNEL(ioc), "migration-fd-outgoing") +> >> migration_channel_connect(s, ioc, NULL) +> >> ... ... +> >> We didn't call qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN) above, +> >> and the +> >> migrate_fd_cancel() +> >> { +> >> ... ... +> >> if (s->state == MIGRATION_STATUS_CANCELLING && f) { +> >> qemu_file_shutdown(f) --> This will not take effect. No ? +> >> } +> >> } +> > +> > (cc'd in Daniel Berrange). +> > I see that we call qio_channel_set_feature(ioc, +QIO_CHANNEL_FEATURE_SHUTDOWN) at the +> > top of qio_channel_socket_new so I think that's safe isn't it? +> > +> +> Hmm, you are right, this problem is only exist for the migration incoming fd, +thanks. +> +> > Dave +> > +> >> Thanks, +> >> Hailiang +> >> +> >> On 2017/3/21 16:10, address@hidden wrote: +> >>> Thank you。 +> >>> +> >>> I have test aready。 +> >>> +> >>> When the Primary Node panic,the Secondary Node qemu hang at the same +place。 +> >>> +> >>> Incorrding +http://wiki.qemu-project.org/Features/COLO +,kill Primary Node +qemu will not produce the problem,but Primary Node panic can。 +> >>> +> >>> I think due to the feature of channel does not support +QIO_CHANNEL_FEATURE_SHUTDOWN. +> >>> +> >>> +> >>> when failover,channel_shutdown could not shut down the channel. +> >>> +> >>> +> >>> so the colo_process_incoming_thread will hang at recvmsg. +> >>> +> >>> +> >>> I test a patch: +> >>> +> >>> +> >>> diff --git a/migration/socket.c b/migration/socket.c +> >>> +> >>> +> >>> index 13966f1..d65a0ea 100644 +> >>> +> >>> +> >>> --- a/migration/socket.c +> >>> +> >>> +> >>> +++ b/migration/socket.c +> >>> +> >>> +> >>> @@ -147,8 +147,9 @@ static gboolean +socket_accept_incoming_migration(QIOChannel *ioc, +> >>> +> >>> +> >>> } +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> trace_migration_socket_incoming_accepted() +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> qio_channel_set_name(QIO_CHANNEL(sioc), +"migration-socket-incoming") +> >>> +> >>> +> >>> + qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN) +> >>> +> >>> +> >>> migration_channel_process_incoming(migrate_get_current(), +> >>> +> >>> +> >>> QIO_CHANNEL(sioc)) +> >>> +> >>> +> >>> object_unref(OBJECT(sioc)) +> >>> +> >>> +> >>> +> >>> +> >>> My test will not hang any more. +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> 原始邮件 +> >>> +> >>> +> >>> +> >>> 发件人: address@hidden +> >>> 收件人:王广10165992 address@hidden +> >>> 抄送人: address@hidden address@hidden +> >>> 日 期 :2017å¹´03月21日 15:58 +> >>> 主 题 :Re: [Qemu-devel] 答复: Re: [BUG]COLO failover hang +> >>> +> >>> +> >>> +> >>> +> >>> +> >>> Hi,Wang. +> >>> +> >>> You can test this branch: +> >>> +> >>> +https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk +> >>> +> >>> and please follow wiki ensure your own configuration correctly. +> >>> +> >>> +http://wiki.qemu-project.org/Features/COLO +> >>> +> >>> +> >>> Thanks +> >>> +> >>> Zhang Chen +> >>> +> >>> +> >>> On 03/21/2017 03:27 PM, address@hidden wrote: +> >>> > +> >>> > hi. +> >>> > +> >>> > I test the git qemu master have the same problem. +> >>> > +> >>> > (gdb) bt +> >>> > +> >>> > #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, +> >>> > niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 +> >>> > +> >>> > #1 0x00007f658e4aa0c2 in qio_channel_read +> >>> > (address@hidden, address@hidden "", +> >>> > address@hidden, address@hidden) at io/channel.c:114 +> >>> > +> >>> > #2 0x00007f658e3ea990 in channel_get_buffer (opaque=<optimized out>, +> >>> > buf=0x7f65907cb838 "", pos=<optimized out>, size=32768) at +> >>> > migration/qemu-file-channel.c:78 +> >>> > +> >>> > #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at +> >>> > migration/qemu-file.c:295 +> >>> > +> >>> > #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, +> >>> > address@hidden) at migration/qemu-file.c:555 +> >>> > +> >>> > #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at +> >>> > migration/qemu-file.c:568 +> >>> > +> >>> > #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at +> >>> > migration/qemu-file.c:648 +> >>> > +> >>> > #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, +> >>> > address@hidden) at migration/colo.c:244 +> >>> > +> >>> > #8 0x00007f658e3e681e in colo_receive_check_message (f=<optimized +> >>> > out>, address@hidden, +> >>> > address@hidden) +> >>> > +> >>> > at migration/colo.c:264 +> >>> > +> >>> > #9 0x00007f658e3e740e in colo_process_incoming_thread +> >>> > (opaque=0x7f658eb30360 <mis_current.31286>) at migration/colo.c:577 +> >>> > +> >>> > #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 +> >>> > +> >>> > #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 +> >>> > +> >>> > (gdb) p ioc->name +> >>> > +> >>> > $2 = 0x7f658ff7d5c0 "migration-socket-incoming" +> >>> > +> >>> > (gdb) p ioc->features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN +> >>> > +> >>> > $3 = 0 +> >>> > +> >>> > +> >>> > (gdb) bt +> >>> > +> >>> > #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, +> >>> > condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 +> >>> > +> >>> > #1 0x00007fdcc6966350 in g_main_dispatch (context=<optimized out>) at +> >>> > gmain.c:3054 +> >>> > +> >>> > #2 g_main_context_dispatch (context=<optimized out>, +> >>> > address@hidden) at gmain.c:3630 +> >>> > +> >>> > #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 +> >>> > +> >>> > #4 os_host_main_loop_wait (timeout=<optimized out>) at +> >>> > util/main-loop.c:258 +> >>> > +> >>> > #5 main_loop_wait (address@hidden) at +> >>> > util/main-loop.c:506 +> >>> > +> >>> > #6 0x00007fdccb526187 in main_loop () at vl.c:1898 +> >>> > +> >>> > #7 main (argc=<optimized out>, argv=<optimized out>, envp=<optimized +> >>> > out>) at vl.c:4709 +> >>> > +> >>> > (gdb) p ioc->features +> >>> > +> >>> > $1 = 6 +> >>> > +> >>> > (gdb) p ioc->name +> >>> > +> >>> > $2 = 0x7fdcce1b1ab0 "migration-socket-listener" +> >>> > +> >>> > +> >>> > May be socket_accept_incoming_migration should +> >>> > call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? +> >>> > +> >>> > +> >>> > thank you. +> >>> > +> >>> > +> >>> > +> >>> > +> >>> > +> >>> > 原始邮件 +> >>> > address@hidden +> >>> > address@hidden +> >>> > address@hidden@huawei.com> +> >>> > *日 期 :*2017å¹´03月16日 14:46 +> >>> > *主 题 :**Re: [Qemu-devel] COLO failover hang* +> >>> > +> >>> > +> >>> > +> >>> > +> >>> > On 03/15/2017 05:06 PM, wangguang wrote: +> >>> > > am testing QEMU COLO feature described here [QEMU +> >>> > > Wiki]( +http://wiki.qemu-project.org/Features/COLO +). +> >>> > > +> >>> > > When the Primary Node panic,the Secondary Node qemu hang. +> >>> > > hang at recvmsg in qio_channel_socket_readv. +> >>> > > And I run { 'execute': 'nbd-server-stop' } and { "execute": +> >>> > > "x-colo-lost-heartbeat" } in Secondary VM's +> >>> > > monitor,the Secondary Node qemu still hang at recvmsg . +> >>> > > +> >>> > > I found that the colo in qemu is not complete yet. +> >>> > > Do the colo have any plan for development? +> >>> > +> >>> > Yes, We are developing. You can see some of patch we pushing. +> >>> > +> >>> > > Has anyone ever run it successfully? Any help is appreciated! +> >>> > +> >>> > In our internal version can run it successfully, +> >>> > The failover detail you can ask Zhanghailiang for help. +> >>> > Next time if you have some question about COLO, +> >>> > please cc me and zhanghailiang address@hidden +> >>> > +> >>> > +> >>> > Thanks +> >>> > Zhang Chen +> >>> > +> >>> > +> >>> > > +> >>> > > +> >>> > > +> >>> > > centos7.2+qemu2.7.50 +> >>> > > (gdb) bt +> >>> > > #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 +> >>> > > #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=<optimized +out>, +> >>> > > iov=<optimized out>, niov=<optimized out>, fds=0x0, nfds=0x0, +errp=0x0) at +> >>> > > io/channel-socket.c:497 +> >>> > > #2 0x00007f3e03329472 in qio_channel_read (address@hidden, +> >>> > > address@hidden "", address@hidden, +> >>> > > address@hidden) at io/channel.c:97 +> >>> > > #3 0x00007f3e032750e0 in channel_get_buffer (opaque=<optimized out>, +> >>> > > buf=0x7f3e05910f38 "", pos=<optimized out>, size=32768) at +> >>> > > migration/qemu-file-channel.c:78 +> >>> > > #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at +> >>> > > migration/qemu-file.c:257 +> >>> > > #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, +> >>> > > address@hidden) at migration/qemu-file.c:510 +> >>> > > #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at +> >>> > > migration/qemu-file.c:523 +> >>> > > #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at +> >>> > > migration/qemu-file.c:603 +> >>> > > #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, +> >>> > > address@hidden) at migration/colo.c:215 +> >>> > > #9 0x00007f3e0327250d in colo_wait_handle_message +(errp=0x7f3d62bfaa48, +> >>> > > checkpoint_request=<synthetic pointer>, f=<optimized out>) at +> >>> > > migration/colo.c:546 +> >>> > > #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at +> >>> > > migration/colo.c:649 +> >>> > > #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 +> >>> > > #12 0x00007f3dfc9c03ed in clone () from /lib64/libc..so.6 +> >>> > > +> >>> > > +> >>> > > +> >>> > > +> >>> > > +> >>> > > -- +> >>> > > View this message in context: +http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html +> >>> > > Sent from the Developer mailing list archive at Nabble.com. +> >>> > > +> >>> > > +> >>> > > +> >>> > > +> >>> > +> >>> > -- +> >>> > Thanks +> >>> > Zhang Chen +> >>> > +> >>> > +> >>> > +> >>> > +> >>> > +> >>> +> >> +> > -- +> > Dr. David Alan Gilbert / address@hidden / Manchester, UK +> > +> > . +> > +> + diff --git a/classification_output/01/mistranslation/71456293 b/classification_output/01/mistranslation/71456293 new file mode 100644 index 000000000..746a624cc --- /dev/null +++ b/classification_output/01/mistranslation/71456293 @@ -0,0 +1,1486 @@ +mistranslation: 0.659 +instruction: 0.624 +semantic: 0.600 +other: 0.598 + +[Qemu-devel][bug] qemu crash when migrate vm and vm's disks + +When migrate vm and vm’s disks target host qemu crash due to an invalid free. +#0  object_unref (obj=0x1000) at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/qom/object.c:920 +#1  0x0000560434d79e79 in memory_region_unref (mr=) +at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:1730 +#2  flatview_destroy (view=0x560439653880) at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:292 +#3  0x000056043514dfbe in call_rcu_thread (opaque=) +at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/util/rcu.c:284 +#4  0x00007fbc2b36fe25 in start_thread () from /lib64/libpthread.so.0 +#5  0x00007fbc2b099bad in clone () from /lib64/libc.so.6 +test base qemu-2.12.0 +, +but use lastest qemu(v6.0.0-rc2) also reproduce. +As follow patch can resolve this problem: +https://lists.gnu.org/archive/html/qemu-devel/2018-07/msg02272.html +Steps to reproduce: +(1) Create VM (virsh define) +(2) Add 64 virtio scsi disks +(3) migrate vm and vm’disks +------------------------------------------------------------------------------------------------------------------------------------- +本邮件及其附件含有新华三集团的保密信息,仅限于发送给上面地址中列出 +的个人或群组。禁止任何其他人以任何形式使用(包括但不限于全部或部分地泄露、复制、 +或散发)本邮件中的信息。如果您错收了本邮件,请您立即电话或邮件通知发件人并删除本 +邮件! +This e-mail and its attachments contain confidential information from New H3C, which is +intended only for the person or entity whose address is listed above. Any use of the +information contained herein in any way (including, but not limited to, total or partial +disclosure, reproduction, or dissemination) by persons other than the intended +recipient(s) is prohibited. If you receive this e-mail in error, please notify the sender +by phone or email immediately and delete it! + +* Yuchen (yu.chen@h3c.com) wrote: +> +When migrate vm and vm’s disks target host qemu crash due to an invalid free. +> +> +#0 object_unref (obj=0x1000) at +> +/qemu-2.12/rpmbuild/BUILD/qemu-2.12/qom/object.c:920 +> +#1 0x0000560434d79e79 in memory_region_unref (mr=) +> +at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:1730 +> +#2 flatview_destroy (view=0x560439653880) at +> +/qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:292 +> +#3 0x000056043514dfbe in call_rcu_thread (opaque=) +> +at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/util/rcu.c:284 +> +#4 0x00007fbc2b36fe25 in start_thread () from /lib64/libpthread.so.0 +> +#5 0x00007fbc2b099bad in clone () from /lib64/libc.so.6 +> +> +test base qemu-2.12.0,but use lastest qemu(v6.0.0-rc2) also reproduce. +Interesting. + +> +As follow patch can resolve this problem: +> +https://lists.gnu.org/archive/html/qemu-devel/2018-07/msg02272.html +That's a pci/rcu change; ccing Paolo and Micahel. + +> +Steps to reproduce: +> +(1) Create VM (virsh define) +> +(2) Add 64 virtio scsi disks +Is that hot adding the disks later, or are they included in the VM at +creation? +Can you provide a libvirt XML example? + +> +(3) migrate vm and vm’disks +What do you mean by 'and vm disks' - are you doing a block migration? + +Dave + +> +------------------------------------------------------------------------------------------------------------------------------------- +> +本邮件及其附件含有新华三集团的保密信息,仅限于发送给上面地址中列出 +> +的个人或群组。禁止任何其他人以任何形式使用(包括但不限于全部或部分地泄露、复制、 +> +或散发)本邮件中的信息。如果您错收了本邮件,请您立即电话或邮件通知发件人并删除本 +> +邮件! +> +This e-mail and its attachments contain confidential information from New +> +H3C, which is +> +intended only for the person or entity whose address is listed above. Any use +> +of the +> +information contained herein in any way (including, but not limited to, total +> +or partial +> +disclosure, reproduction, or dissemination) by persons other than the intended +> +recipient(s) is prohibited. If you receive this e-mail in error, please +> +notify the sender +> +by phone or email immediately and delete it! +-- +Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK + +> +-----邮件原件----- +> +发件人: Dr. David Alan Gilbert [ +mailto:dgilbert@redhat.com +] +> +发送时间: 2021å¹´4月8日 19:27 +> +收件人: yuchen (Cloud) ; pbonzini@redhat.com; +> +mst@redhat.com +> +抄送: qemu-devel@nongnu.org +> +主题: Re: [Qemu-devel][bug] qemu crash when migrate vm and vm's disks +> +> +* Yuchen (yu.chen@h3c.com) wrote: +> +> When migrate vm and vm’s disks target host qemu crash due to an invalid +> +free. +> +> +> +> #0 object_unref (obj=0x1000) at +> +> /qemu-2.12/rpmbuild/BUILD/qemu-2.12/qom/object.c:920 +> +> #1 0x0000560434d79e79 in memory_region_unref (mr=) +> +> at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:1730 +> +> #2 flatview_destroy (view=0x560439653880) at +> +> /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:292 +> +> #3 0x000056043514dfbe in call_rcu_thread (opaque=) +> +> at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/util/rcu.c:284 +> +> #4 0x00007fbc2b36fe25 in start_thread () from /lib64/libpthread.so.0 +> +> #5 0x00007fbc2b099bad in clone () from /lib64/libc.so.6 +> +> +> +> test base qemu-2.12.0,but use lastest qemu(v6.0.0-rc2) also reproduce. +> +> +Interesting. +> +> +> As follow patch can resolve this problem: +> +> +https://lists.gnu.org/archive/html/qemu-devel/2018-07/msg02272.html +> +> +That's a pci/rcu change; ccing Paolo and Micahel. +> +> +> Steps to reproduce: +> +> (1) Create VM (virsh define) +> +> (2) Add 64 virtio scsi disks +> +> +Is that hot adding the disks later, or are they included in the VM at +> +creation? +> +Can you provide a libvirt XML example? +> +Include disks in the VM at creation + +vm disks xml (only virtio scsi disks): + + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + +
+ + + + + +
+ + + + +
+ + + +vm disks xml (only virtio disks): + + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + + +
+ + + + +
+ + + +> +> (3) migrate vm and vm’disks +> +> +What do you mean by 'and vm disks' - are you doing a block migration? +> +Yes, block migration. +In fact, only migration domain also reproduced. + +> +Dave +> +> +> ---------------------------------------------------------------------- +> +> --------------------------------------------------------------- +> +Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK +------------------------------------------------------------------------------------------------------------------------------------- +本邮件及其附件含有新华三集团的保密信息,仅限于发送给上面地址中列出 +的个人或群组。禁止任何其他人以任何形式使用(包括但不限于全部或部分地泄露、复制、 +或散发)本邮件中的信息。如果您错收了本邮件,请您立即电话或邮件通知发件人并删除本 +邮件! +This e-mail and its attachments contain confidential information from New H3C, +which is +intended only for the person or entity whose address is listed above. Any use +of the +information contained herein in any way (including, but not limited to, total +or partial +disclosure, reproduction, or dissemination) by persons other than the intended +recipient(s) is prohibited. If you receive this e-mail in error, please notify +the sender +by phone or email immediately and delete it! + diff --git a/classification_output/01/mistranslation/74466963 b/classification_output/01/mistranslation/74466963 new file mode 100644 index 000000000..fffafcf77 --- /dev/null +++ b/classification_output/01/mistranslation/74466963 @@ -0,0 +1,1878 @@ +mistranslation: 0.927 +instruction: 0.903 +semantic: 0.891 +other: 0.877 + +[Qemu-devel] [TCG only][Migration Bug? ] Occasionally, the content of VM's memory is inconsistent between Source and Destination of migration + +Hi all, + +Does anyboday remember the similar issue post by hailiang months ago +http://patchwork.ozlabs.org/patch/454322/ +At least tow bugs about migration had been fixed since that. +And now we found the same issue at the tcg vm(kvm is fine), after +migration, the content VM's memory is inconsistent. +we add a patch to check memory content, you can find it from affix + +steps to reporduce: +1) apply the patch and re-build qemu +2) prepare the ubuntu guest and run memtest in grub. +soruce side: +x86_64-softmmu/qemu-system-x86_64 -netdev tap,id=hn0 -device +e1000,id=net-pci0,netdev=hn0,mac=52:54:00:12:34:65 -boot c -drive +if=none,file=/home/lizj/ubuntu.raw,id=drive-virtio-disk0 -device +virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 +-vnc :7 -m 128 -smp 1 -device piix3-usb-uhci -device usb-tablet -qmp +tcp::4444,server,nowait -monitor stdio -cpu qemu64 -machine +pc-i440fx-2.3,accel=tcg,usb=off +destination side: +x86_64-softmmu/qemu-system-x86_64 -netdev tap,id=hn0 -device +e1000,id=net-pci0,netdev=hn0,mac=52:54:00:12:34:65 -boot c -drive +if=none,file=/home/lizj/ubuntu.raw,id=drive-virtio-disk0 -device +virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 +-vnc :7 -m 128 -smp 1 -device piix3-usb-uhci -device usb-tablet -qmp +tcp::4444,server,nowait -monitor stdio -cpu qemu64 -machine +pc-i440fx-2.3,accel=tcg,usb=off -incoming tcp:0:8881 +3) start migration +with 1000M NIC, migration will finish within 3 min. + +at source: +(qemu) migrate tcp:192.168.2.66:8881 +after saving ram complete +e9e725df678d392b1a83b3a917f332bb +qemu-system-x86_64: end ram md5 +(qemu) + +at destination: +...skip... +Completed load of VM with exit code 0 seq iteration 1264 +Completed load of VM with exit code 0 seq iteration 1265 +Completed load of VM with exit code 0 seq iteration 1266 +qemu-system-x86_64: after loading state section id 2(ram) +49c2dac7bde0e5e22db7280dcb3824f9 +qemu-system-x86_64: end ram md5 +qemu-system-x86_64: qemu_loadvm_state: after cpu_synchronize_all_post_init + +49c2dac7bde0e5e22db7280dcb3824f9 +qemu-system-x86_64: end ram md5 + +This occurs occasionally and only at tcg machine. It seems that +some pages dirtied in source side don't transferred to destination. +This problem can be reproduced even if we disable virtio. +Is it OK for some pages that not transferred to destination when do +migration ? Or is it a bug? +Any idea... + +=================md5 check patch============================= + +diff --git a/Makefile.target b/Makefile.target +index 962d004..e2cb8e9 100644 +--- a/Makefile.target ++++ b/Makefile.target +@@ -139,7 +139,7 @@ obj-y += memory.o cputlb.o + obj-y += memory_mapping.o + obj-y += dump.o + obj-y += migration/ram.o migration/savevm.o +-LIBS := $(libs_softmmu) $(LIBS) ++LIBS := $(libs_softmmu) $(LIBS) -lplumb + + # xen support + obj-$(CONFIG_XEN) += xen-common.o +diff --git a/migration/ram.c b/migration/ram.c +index 1eb155a..3b7a09d 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -2513,7 +2513,7 @@ static int ram_load(QEMUFile *f, void *opaque, int +version_id) +} + + rcu_read_unlock(); +- DPRINTF("Completed load of VM with exit code %d seq iteration " ++ fprintf(stderr, "Completed load of VM with exit code %d seq iteration " + "%" PRIu64 "\n", ret, seq_iter); + return ret; + } +diff --git a/migration/savevm.c b/migration/savevm.c +index 0ad1b93..3feaa61 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -891,6 +891,29 @@ void qemu_savevm_state_header(QEMUFile *f) + + } + ++#include "exec/ram_addr.h" ++#include "qemu/rcu_queue.h" ++#include ++#ifndef MD5_DIGEST_LENGTH ++#define MD5_DIGEST_LENGTH 16 ++#endif ++ ++static void check_host_md5(void) ++{ ++ int i; ++ unsigned char md[MD5_DIGEST_LENGTH]; ++ rcu_read_lock(); ++ RAMBlock *block = QLIST_FIRST_RCU(&ram_list.blocks);/* Only check +'pc.ram' block */ ++ rcu_read_unlock(); ++ ++ MD5(block->host, block->used_length, md); ++ for(i = 0; i < MD5_DIGEST_LENGTH; i++) { ++ fprintf(stderr, "%02x", md[i]); ++ } ++ fprintf(stderr, "\n"); ++ error_report("end ram md5"); ++} ++ + void qemu_savevm_state_begin(QEMUFile *f, + const MigrationParams *params) + { +@@ -1056,6 +1079,10 @@ void qemu_savevm_state_complete_precopy(QEMUFile +*f, bool iterable_only) +save_section_header(f, se, QEMU_VM_SECTION_END); + + ret = se->ops->save_live_complete_precopy(f, se->opaque); ++ ++ fprintf(stderr, "after saving %s complete\n", se->idstr); ++ check_host_md5(); ++ + trace_savevm_section_end(se->idstr, se->section_id, ret); + save_section_footer(f, se); + if (ret < 0) { +@@ -1791,6 +1818,11 @@ static int qemu_loadvm_state_main(QEMUFile *f, +MigrationIncomingState *mis) +section_id, le->se->idstr); + return ret; + } ++ if (section_type == QEMU_VM_SECTION_END) { ++ error_report("after loading state section id %d(%s)", ++ section_id, le->se->idstr); ++ check_host_md5(); ++ } + if (!check_section_footer(f, le)) { + return -EINVAL; + } +@@ -1901,6 +1933,8 @@ int qemu_loadvm_state(QEMUFile *f) + } + + cpu_synchronize_all_post_init(); ++ error_report("%s: after cpu_synchronize_all_post_init\n", __func__); ++ check_host_md5(); + + return ret; + } + +* Li Zhijian (address@hidden) wrote: +> +Hi all, +> +> +Does anyboday remember the similar issue post by hailiang months ago +> +http://patchwork.ozlabs.org/patch/454322/ +> +At least tow bugs about migration had been fixed since that. +Yes, I wondered what happened to that. + +> +And now we found the same issue at the tcg vm(kvm is fine), after migration, +> +the content VM's memory is inconsistent. +Hmm, TCG only - I don't know much about that; but I guess something must +be accessing memory without using the proper macros/functions so +it doesn't mark it as dirty. + +> +we add a patch to check memory content, you can find it from affix +> +> +steps to reporduce: +> +1) apply the patch and re-build qemu +> +2) prepare the ubuntu guest and run memtest in grub. +> +soruce side: +> +x86_64-softmmu/qemu-system-x86_64 -netdev tap,id=hn0 -device +> +e1000,id=net-pci0,netdev=hn0,mac=52:54:00:12:34:65 -boot c -drive +> +if=none,file=/home/lizj/ubuntu.raw,id=drive-virtio-disk0 -device +> +virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 +> +-vnc :7 -m 128 -smp 1 -device piix3-usb-uhci -device usb-tablet -qmp +> +tcp::4444,server,nowait -monitor stdio -cpu qemu64 -machine +> +pc-i440fx-2.3,accel=tcg,usb=off +> +> +destination side: +> +x86_64-softmmu/qemu-system-x86_64 -netdev tap,id=hn0 -device +> +e1000,id=net-pci0,netdev=hn0,mac=52:54:00:12:34:65 -boot c -drive +> +if=none,file=/home/lizj/ubuntu.raw,id=drive-virtio-disk0 -device +> +virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 +> +-vnc :7 -m 128 -smp 1 -device piix3-usb-uhci -device usb-tablet -qmp +> +tcp::4444,server,nowait -monitor stdio -cpu qemu64 -machine +> +pc-i440fx-2.3,accel=tcg,usb=off -incoming tcp:0:8881 +> +> +3) start migration +> +with 1000M NIC, migration will finish within 3 min. +> +> +at source: +> +(qemu) migrate tcp:192.168.2.66:8881 +> +after saving ram complete +> +e9e725df678d392b1a83b3a917f332bb +> +qemu-system-x86_64: end ram md5 +> +(qemu) +> +> +at destination: +> +...skip... +> +Completed load of VM with exit code 0 seq iteration 1264 +> +Completed load of VM with exit code 0 seq iteration 1265 +> +Completed load of VM with exit code 0 seq iteration 1266 +> +qemu-system-x86_64: after loading state section id 2(ram) +> +49c2dac7bde0e5e22db7280dcb3824f9 +> +qemu-system-x86_64: end ram md5 +> +qemu-system-x86_64: qemu_loadvm_state: after cpu_synchronize_all_post_init +> +> +49c2dac7bde0e5e22db7280dcb3824f9 +> +qemu-system-x86_64: end ram md5 +> +> +This occurs occasionally and only at tcg machine. It seems that +> +some pages dirtied in source side don't transferred to destination. +> +This problem can be reproduced even if we disable virtio. +> +> +Is it OK for some pages that not transferred to destination when do +> +migration ? Or is it a bug? +I'm pretty sure that means it's a bug. Hard to find though, I guess +at least memtest is smaller than a big OS. I think I'd dump the whole +of memory on both sides, hexdump and diff them - I'd guess it would +just be one byte/word different, maybe that would offer some idea what +wrote it. + +Dave + +> +Any idea... +> +> +=================md5 check patch============================= +> +> +diff --git a/Makefile.target b/Makefile.target +> +index 962d004..e2cb8e9 100644 +> +--- a/Makefile.target +> ++++ b/Makefile.target +> +@@ -139,7 +139,7 @@ obj-y += memory.o cputlb.o +> +obj-y += memory_mapping.o +> +obj-y += dump.o +> +obj-y += migration/ram.o migration/savevm.o +> +-LIBS := $(libs_softmmu) $(LIBS) +> ++LIBS := $(libs_softmmu) $(LIBS) -lplumb +> +> +# xen support +> +obj-$(CONFIG_XEN) += xen-common.o +> +diff --git a/migration/ram.c b/migration/ram.c +> +index 1eb155a..3b7a09d 100644 +> +--- a/migration/ram.c +> ++++ b/migration/ram.c +> +@@ -2513,7 +2513,7 @@ static int ram_load(QEMUFile *f, void *opaque, int +> +version_id) +> +} +> +> +rcu_read_unlock(); +> +- DPRINTF("Completed load of VM with exit code %d seq iteration " +> ++ fprintf(stderr, "Completed load of VM with exit code %d seq iteration " +> +"%" PRIu64 "\n", ret, seq_iter); +> +return ret; +> +} +> +diff --git a/migration/savevm.c b/migration/savevm.c +> +index 0ad1b93..3feaa61 100644 +> +--- a/migration/savevm.c +> ++++ b/migration/savevm.c +> +@@ -891,6 +891,29 @@ void qemu_savevm_state_header(QEMUFile *f) +> +> +} +> +> ++#include "exec/ram_addr.h" +> ++#include "qemu/rcu_queue.h" +> ++#include +> ++#ifndef MD5_DIGEST_LENGTH +> ++#define MD5_DIGEST_LENGTH 16 +> ++#endif +> ++ +> ++static void check_host_md5(void) +> ++{ +> ++ int i; +> ++ unsigned char md[MD5_DIGEST_LENGTH]; +> ++ rcu_read_lock(); +> ++ RAMBlock *block = QLIST_FIRST_RCU(&ram_list.blocks);/* Only check +> +'pc.ram' block */ +> ++ rcu_read_unlock(); +> ++ +> ++ MD5(block->host, block->used_length, md); +> ++ for(i = 0; i < MD5_DIGEST_LENGTH; i++) { +> ++ fprintf(stderr, "%02x", md[i]); +> ++ } +> ++ fprintf(stderr, "\n"); +> ++ error_report("end ram md5"); +> ++} +> ++ +> +void qemu_savevm_state_begin(QEMUFile *f, +> +const MigrationParams *params) +> +{ +> +@@ -1056,6 +1079,10 @@ void qemu_savevm_state_complete_precopy(QEMUFile *f, +> +bool iterable_only) +> +save_section_header(f, se, QEMU_VM_SECTION_END); +> +> +ret = se->ops->save_live_complete_precopy(f, se->opaque); +> ++ +> ++ fprintf(stderr, "after saving %s complete\n", se->idstr); +> ++ check_host_md5(); +> ++ +> +trace_savevm_section_end(se->idstr, se->section_id, ret); +> +save_section_footer(f, se); +> +if (ret < 0) { +> +@@ -1791,6 +1818,11 @@ static int qemu_loadvm_state_main(QEMUFile *f, +> +MigrationIncomingState *mis) +> +section_id, le->se->idstr); +> +return ret; +> +} +> ++ if (section_type == QEMU_VM_SECTION_END) { +> ++ error_report("after loading state section id %d(%s)", +> ++ section_id, le->se->idstr); +> ++ check_host_md5(); +> ++ } +> +if (!check_section_footer(f, le)) { +> +return -EINVAL; +> +} +> +@@ -1901,6 +1933,8 @@ int qemu_loadvm_state(QEMUFile *f) +> +} +> +> +cpu_synchronize_all_post_init(); +> ++ error_report("%s: after cpu_synchronize_all_post_init\n", __func__); +> ++ check_host_md5(); +> +> +return ret; +> +} +> +> +> +-- +Dr. David Alan Gilbert / address@hidden / Manchester, UK + +On 2015/12/3 17:24, Dr. David Alan Gilbert wrote: +* Li Zhijian (address@hidden) wrote: +Hi all, + +Does anyboday remember the similar issue post by hailiang months ago +http://patchwork.ozlabs.org/patch/454322/ +At least tow bugs about migration had been fixed since that. +Yes, I wondered what happened to that. +And now we found the same issue at the tcg vm(kvm is fine), after migration, +the content VM's memory is inconsistent. +Hmm, TCG only - I don't know much about that; but I guess something must +be accessing memory without using the proper macros/functions so +it doesn't mark it as dirty. +we add a patch to check memory content, you can find it from affix + +steps to reporduce: +1) apply the patch and re-build qemu +2) prepare the ubuntu guest and run memtest in grub. +soruce side: +x86_64-softmmu/qemu-system-x86_64 -netdev tap,id=hn0 -device +e1000,id=net-pci0,netdev=hn0,mac=52:54:00:12:34:65 -boot c -drive +if=none,file=/home/lizj/ubuntu.raw,id=drive-virtio-disk0 -device +virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 +-vnc :7 -m 128 -smp 1 -device piix3-usb-uhci -device usb-tablet -qmp +tcp::4444,server,nowait -monitor stdio -cpu qemu64 -machine +pc-i440fx-2.3,accel=tcg,usb=off + +destination side: +x86_64-softmmu/qemu-system-x86_64 -netdev tap,id=hn0 -device +e1000,id=net-pci0,netdev=hn0,mac=52:54:00:12:34:65 -boot c -drive +if=none,file=/home/lizj/ubuntu.raw,id=drive-virtio-disk0 -device +virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 +-vnc :7 -m 128 -smp 1 -device piix3-usb-uhci -device usb-tablet -qmp +tcp::4444,server,nowait -monitor stdio -cpu qemu64 -machine +pc-i440fx-2.3,accel=tcg,usb=off -incoming tcp:0:8881 + +3) start migration +with 1000M NIC, migration will finish within 3 min. + +at source: +(qemu) migrate tcp:192.168.2.66:8881 +after saving ram complete +e9e725df678d392b1a83b3a917f332bb +qemu-system-x86_64: end ram md5 +(qemu) + +at destination: +...skip... +Completed load of VM with exit code 0 seq iteration 1264 +Completed load of VM with exit code 0 seq iteration 1265 +Completed load of VM with exit code 0 seq iteration 1266 +qemu-system-x86_64: after loading state section id 2(ram) +49c2dac7bde0e5e22db7280dcb3824f9 +qemu-system-x86_64: end ram md5 +qemu-system-x86_64: qemu_loadvm_state: after cpu_synchronize_all_post_init + +49c2dac7bde0e5e22db7280dcb3824f9 +qemu-system-x86_64: end ram md5 + +This occurs occasionally and only at tcg machine. It seems that +some pages dirtied in source side don't transferred to destination. +This problem can be reproduced even if we disable virtio. + +Is it OK for some pages that not transferred to destination when do +migration ? Or is it a bug? +I'm pretty sure that means it's a bug. Hard to find though, I guess +at least memtest is smaller than a big OS. I think I'd dump the whole +of memory on both sides, hexdump and diff them - I'd guess it would +just be one byte/word different, maybe that would offer some idea what +wrote it. +Maybe one better way to do that is with the help of userfaultfd's write-protect +capability. It is still in the development by Andrea Arcangeli, but there +is a RFC version available, please refer to +http://www.spinics.net/lists/linux-mm/msg97422.html +(I'm developing live memory snapshot which based on it, maybe this is another +scene where we +can use userfaultfd's WP ;) ). +Dave +Any idea... + +=================md5 check patch============================= + +diff --git a/Makefile.target b/Makefile.target +index 962d004..e2cb8e9 100644 +--- a/Makefile.target ++++ b/Makefile.target +@@ -139,7 +139,7 @@ obj-y += memory.o cputlb.o + obj-y += memory_mapping.o + obj-y += dump.o + obj-y += migration/ram.o migration/savevm.o +-LIBS := $(libs_softmmu) $(LIBS) ++LIBS := $(libs_softmmu) $(LIBS) -lplumb + + # xen support + obj-$(CONFIG_XEN) += xen-common.o +diff --git a/migration/ram.c b/migration/ram.c +index 1eb155a..3b7a09d 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -2513,7 +2513,7 @@ static int ram_load(QEMUFile *f, void *opaque, int +version_id) + } + + rcu_read_unlock(); +- DPRINTF("Completed load of VM with exit code %d seq iteration " ++ fprintf(stderr, "Completed load of VM with exit code %d seq iteration " + "%" PRIu64 "\n", ret, seq_iter); + return ret; + } +diff --git a/migration/savevm.c b/migration/savevm.c +index 0ad1b93..3feaa61 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -891,6 +891,29 @@ void qemu_savevm_state_header(QEMUFile *f) + + } + ++#include "exec/ram_addr.h" ++#include "qemu/rcu_queue.h" ++#include ++#ifndef MD5_DIGEST_LENGTH ++#define MD5_DIGEST_LENGTH 16 ++#endif ++ ++static void check_host_md5(void) ++{ ++ int i; ++ unsigned char md[MD5_DIGEST_LENGTH]; ++ rcu_read_lock(); ++ RAMBlock *block = QLIST_FIRST_RCU(&ram_list.blocks);/* Only check +'pc.ram' block */ ++ rcu_read_unlock(); ++ ++ MD5(block->host, block->used_length, md); ++ for(i = 0; i < MD5_DIGEST_LENGTH; i++) { ++ fprintf(stderr, "%02x", md[i]); ++ } ++ fprintf(stderr, "\n"); ++ error_report("end ram md5"); ++} ++ + void qemu_savevm_state_begin(QEMUFile *f, + const MigrationParams *params) + { +@@ -1056,6 +1079,10 @@ void qemu_savevm_state_complete_precopy(QEMUFile *f, +bool iterable_only) + save_section_header(f, se, QEMU_VM_SECTION_END); + + ret = se->ops->save_live_complete_precopy(f, se->opaque); ++ ++ fprintf(stderr, "after saving %s complete\n", se->idstr); ++ check_host_md5(); ++ + trace_savevm_section_end(se->idstr, se->section_id, ret); + save_section_footer(f, se); + if (ret < 0) { +@@ -1791,6 +1818,11 @@ static int qemu_loadvm_state_main(QEMUFile *f, +MigrationIncomingState *mis) + section_id, le->se->idstr); + return ret; + } ++ if (section_type == QEMU_VM_SECTION_END) { ++ error_report("after loading state section id %d(%s)", ++ section_id, le->se->idstr); ++ check_host_md5(); ++ } + if (!check_section_footer(f, le)) { + return -EINVAL; + } +@@ -1901,6 +1933,8 @@ int qemu_loadvm_state(QEMUFile *f) + } + + cpu_synchronize_all_post_init(); ++ error_report("%s: after cpu_synchronize_all_post_init\n", __func__); ++ check_host_md5(); + + return ret; + } +-- +Dr. David Alan Gilbert / address@hidden / Manchester, UK + +. + +On 12/03/2015 05:37 PM, Hailiang Zhang wrote: +On 2015/12/3 17:24, Dr. David Alan Gilbert wrote: +* Li Zhijian (address@hidden) wrote: +Hi all, + +Does anyboday remember the similar issue post by hailiang months ago +http://patchwork.ozlabs.org/patch/454322/ +At least tow bugs about migration had been fixed since that. +Yes, I wondered what happened to that. +And now we found the same issue at the tcg vm(kvm is fine), after +migration, +the content VM's memory is inconsistent. +Hmm, TCG only - I don't know much about that; but I guess something must +be accessing memory without using the proper macros/functions so +it doesn't mark it as dirty. +we add a patch to check memory content, you can find it from affix + +steps to reporduce: +1) apply the patch and re-build qemu +2) prepare the ubuntu guest and run memtest in grub. +soruce side: +x86_64-softmmu/qemu-system-x86_64 -netdev tap,id=hn0 -device +e1000,id=net-pci0,netdev=hn0,mac=52:54:00:12:34:65 -boot c -drive +if=none,file=/home/lizj/ubuntu.raw,id=drive-virtio-disk0 -device +virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 + +-vnc :7 -m 128 -smp 1 -device piix3-usb-uhci -device usb-tablet -qmp +tcp::4444,server,nowait -monitor stdio -cpu qemu64 -machine +pc-i440fx-2.3,accel=tcg,usb=off + +destination side: +x86_64-softmmu/qemu-system-x86_64 -netdev tap,id=hn0 -device +e1000,id=net-pci0,netdev=hn0,mac=52:54:00:12:34:65 -boot c -drive +if=none,file=/home/lizj/ubuntu.raw,id=drive-virtio-disk0 -device +virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 + +-vnc :7 -m 128 -smp 1 -device piix3-usb-uhci -device usb-tablet -qmp +tcp::4444,server,nowait -monitor stdio -cpu qemu64 -machine +pc-i440fx-2.3,accel=tcg,usb=off -incoming tcp:0:8881 + +3) start migration +with 1000M NIC, migration will finish within 3 min. + +at source: +(qemu) migrate tcp:192.168.2.66:8881 +after saving ram complete +e9e725df678d392b1a83b3a917f332bb +qemu-system-x86_64: end ram md5 +(qemu) + +at destination: +...skip... +Completed load of VM with exit code 0 seq iteration 1264 +Completed load of VM with exit code 0 seq iteration 1265 +Completed load of VM with exit code 0 seq iteration 1266 +qemu-system-x86_64: after loading state section id 2(ram) +49c2dac7bde0e5e22db7280dcb3824f9 +qemu-system-x86_64: end ram md5 +qemu-system-x86_64: qemu_loadvm_state: after +cpu_synchronize_all_post_init + +49c2dac7bde0e5e22db7280dcb3824f9 +qemu-system-x86_64: end ram md5 + +This occurs occasionally and only at tcg machine. It seems that +some pages dirtied in source side don't transferred to destination. +This problem can be reproduced even if we disable virtio. + +Is it OK for some pages that not transferred to destination when do +migration ? Or is it a bug? +I'm pretty sure that means it's a bug. Hard to find though, I guess +at least memtest is smaller than a big OS. I think I'd dump the whole +of memory on both sides, hexdump and diff them - I'd guess it would +just be one byte/word different, maybe that would offer some idea what +wrote it. +Maybe one better way to do that is with the help of userfaultfd's +write-protect +capability. It is still in the development by Andrea Arcangeli, but there +is a RFC version available, please refer to +http://www.spinics.net/lists/linux-mm/msg97422.html +(I'm developing live memory snapshot which based on it, maybe this is +another scene where we +can use userfaultfd's WP ;) ). +sounds good. + +thanks +Li +Dave +Any idea... + +=================md5 check patch============================= + +diff --git a/Makefile.target b/Makefile.target +index 962d004..e2cb8e9 100644 +--- a/Makefile.target ++++ b/Makefile.target +@@ -139,7 +139,7 @@ obj-y += memory.o cputlb.o + obj-y += memory_mapping.o + obj-y += dump.o + obj-y += migration/ram.o migration/savevm.o +-LIBS := $(libs_softmmu) $(LIBS) ++LIBS := $(libs_softmmu) $(LIBS) -lplumb + + # xen support + obj-$(CONFIG_XEN) += xen-common.o +diff --git a/migration/ram.c b/migration/ram.c +index 1eb155a..3b7a09d 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -2513,7 +2513,7 @@ static int ram_load(QEMUFile *f, void *opaque, int +version_id) + } + + rcu_read_unlock(); +- DPRINTF("Completed load of VM with exit code %d seq iteration " ++ fprintf(stderr, "Completed load of VM with exit code %d seq +iteration " + "%" PRIu64 "\n", ret, seq_iter); + return ret; + } +diff --git a/migration/savevm.c b/migration/savevm.c +index 0ad1b93..3feaa61 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -891,6 +891,29 @@ void qemu_savevm_state_header(QEMUFile *f) + + } + ++#include "exec/ram_addr.h" ++#include "qemu/rcu_queue.h" ++#include ++#ifndef MD5_DIGEST_LENGTH ++#define MD5_DIGEST_LENGTH 16 ++#endif ++ ++static void check_host_md5(void) ++{ ++ int i; ++ unsigned char md[MD5_DIGEST_LENGTH]; ++ rcu_read_lock(); ++ RAMBlock *block = QLIST_FIRST_RCU(&ram_list.blocks);/* Only check +'pc.ram' block */ ++ rcu_read_unlock(); ++ ++ MD5(block->host, block->used_length, md); ++ for(i = 0; i < MD5_DIGEST_LENGTH; i++) { ++ fprintf(stderr, "%02x", md[i]); ++ } ++ fprintf(stderr, "\n"); ++ error_report("end ram md5"); ++} ++ + void qemu_savevm_state_begin(QEMUFile *f, + const MigrationParams *params) + { +@@ -1056,6 +1079,10 @@ void +qemu_savevm_state_complete_precopy(QEMUFile *f, +bool iterable_only) + save_section_header(f, se, QEMU_VM_SECTION_END); + + ret = se->ops->save_live_complete_precopy(f, se->opaque); ++ ++ fprintf(stderr, "after saving %s complete\n", se->idstr); ++ check_host_md5(); ++ + trace_savevm_section_end(se->idstr, se->section_id, ret); + save_section_footer(f, se); + if (ret < 0) { +@@ -1791,6 +1818,11 @@ static int qemu_loadvm_state_main(QEMUFile *f, +MigrationIncomingState *mis) + section_id, le->se->idstr); + return ret; + } ++ if (section_type == QEMU_VM_SECTION_END) { ++ error_report("after loading state section id %d(%s)", ++ section_id, le->se->idstr); ++ check_host_md5(); ++ } + if (!check_section_footer(f, le)) { + return -EINVAL; + } +@@ -1901,6 +1933,8 @@ int qemu_loadvm_state(QEMUFile *f) + } + + cpu_synchronize_all_post_init(); ++ error_report("%s: after cpu_synchronize_all_post_init\n", +__func__); ++ check_host_md5(); + + return ret; + } +-- +Dr. David Alan Gilbert / address@hidden / Manchester, UK + +. +. +-- +Best regards. +Li Zhijian (8555) + +On 12/03/2015 05:24 PM, Dr. David Alan Gilbert wrote: +* Li Zhijian (address@hidden) wrote: +Hi all, + +Does anyboday remember the similar issue post by hailiang months ago +http://patchwork.ozlabs.org/patch/454322/ +At least tow bugs about migration had been fixed since that. +Yes, I wondered what happened to that. +And now we found the same issue at the tcg vm(kvm is fine), after migration, +the content VM's memory is inconsistent. +Hmm, TCG only - I don't know much about that; but I guess something must +be accessing memory without using the proper macros/functions so +it doesn't mark it as dirty. +we add a patch to check memory content, you can find it from affix + +steps to reporduce: +1) apply the patch and re-build qemu +2) prepare the ubuntu guest and run memtest in grub. +soruce side: +x86_64-softmmu/qemu-system-x86_64 -netdev tap,id=hn0 -device +e1000,id=net-pci0,netdev=hn0,mac=52:54:00:12:34:65 -boot c -drive +if=none,file=/home/lizj/ubuntu.raw,id=drive-virtio-disk0 -device +virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 +-vnc :7 -m 128 -smp 1 -device piix3-usb-uhci -device usb-tablet -qmp +tcp::4444,server,nowait -monitor stdio -cpu qemu64 -machine +pc-i440fx-2.3,accel=tcg,usb=off + +destination side: +x86_64-softmmu/qemu-system-x86_64 -netdev tap,id=hn0 -device +e1000,id=net-pci0,netdev=hn0,mac=52:54:00:12:34:65 -boot c -drive +if=none,file=/home/lizj/ubuntu.raw,id=drive-virtio-disk0 -device +virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 +-vnc :7 -m 128 -smp 1 -device piix3-usb-uhci -device usb-tablet -qmp +tcp::4444,server,nowait -monitor stdio -cpu qemu64 -machine +pc-i440fx-2.3,accel=tcg,usb=off -incoming tcp:0:8881 + +3) start migration +with 1000M NIC, migration will finish within 3 min. + +at source: +(qemu) migrate tcp:192.168.2.66:8881 +after saving ram complete +e9e725df678d392b1a83b3a917f332bb +qemu-system-x86_64: end ram md5 +(qemu) + +at destination: +...skip... +Completed load of VM with exit code 0 seq iteration 1264 +Completed load of VM with exit code 0 seq iteration 1265 +Completed load of VM with exit code 0 seq iteration 1266 +qemu-system-x86_64: after loading state section id 2(ram) +49c2dac7bde0e5e22db7280dcb3824f9 +qemu-system-x86_64: end ram md5 +qemu-system-x86_64: qemu_loadvm_state: after cpu_synchronize_all_post_init + +49c2dac7bde0e5e22db7280dcb3824f9 +qemu-system-x86_64: end ram md5 + +This occurs occasionally and only at tcg machine. It seems that +some pages dirtied in source side don't transferred to destination. +This problem can be reproduced even if we disable virtio. + +Is it OK for some pages that not transferred to destination when do +migration ? Or is it a bug? +I'm pretty sure that means it's a bug. Hard to find though, I guess +at least memtest is smaller than a big OS. I think I'd dump the whole +of memory on both sides, hexdump and diff them - I'd guess it would +just be one byte/word different, maybe that would offer some idea what +wrote it. +I try to dump and compare them, more than 10 pages are different. +in source side, they are random value rather than always 'FF' 'FB' 'EF' +'BF'... in destination. +and not all of the different pages are continuous. + +thanks +Li +Dave +Any idea... + +=================md5 check patch============================= + +diff --git a/Makefile.target b/Makefile.target +index 962d004..e2cb8e9 100644 +--- a/Makefile.target ++++ b/Makefile.target +@@ -139,7 +139,7 @@ obj-y += memory.o cputlb.o + obj-y += memory_mapping.o + obj-y += dump.o + obj-y += migration/ram.o migration/savevm.o +-LIBS := $(libs_softmmu) $(LIBS) ++LIBS := $(libs_softmmu) $(LIBS) -lplumb + + # xen support + obj-$(CONFIG_XEN) += xen-common.o +diff --git a/migration/ram.c b/migration/ram.c +index 1eb155a..3b7a09d 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -2513,7 +2513,7 @@ static int ram_load(QEMUFile *f, void *opaque, int +version_id) + } + + rcu_read_unlock(); +- DPRINTF("Completed load of VM with exit code %d seq iteration " ++ fprintf(stderr, "Completed load of VM with exit code %d seq iteration " + "%" PRIu64 "\n", ret, seq_iter); + return ret; + } +diff --git a/migration/savevm.c b/migration/savevm.c +index 0ad1b93..3feaa61 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -891,6 +891,29 @@ void qemu_savevm_state_header(QEMUFile *f) + + } + ++#include "exec/ram_addr.h" ++#include "qemu/rcu_queue.h" ++#include ++#ifndef MD5_DIGEST_LENGTH ++#define MD5_DIGEST_LENGTH 16 ++#endif ++ ++static void check_host_md5(void) ++{ ++ int i; ++ unsigned char md[MD5_DIGEST_LENGTH]; ++ rcu_read_lock(); ++ RAMBlock *block = QLIST_FIRST_RCU(&ram_list.blocks);/* Only check +'pc.ram' block */ ++ rcu_read_unlock(); ++ ++ MD5(block->host, block->used_length, md); ++ for(i = 0; i < MD5_DIGEST_LENGTH; i++) { ++ fprintf(stderr, "%02x", md[i]); ++ } ++ fprintf(stderr, "\n"); ++ error_report("end ram md5"); ++} ++ + void qemu_savevm_state_begin(QEMUFile *f, + const MigrationParams *params) + { +@@ -1056,6 +1079,10 @@ void qemu_savevm_state_complete_precopy(QEMUFile *f, +bool iterable_only) + save_section_header(f, se, QEMU_VM_SECTION_END); + + ret = se->ops->save_live_complete_precopy(f, se->opaque); ++ ++ fprintf(stderr, "after saving %s complete\n", se->idstr); ++ check_host_md5(); ++ + trace_savevm_section_end(se->idstr, se->section_id, ret); + save_section_footer(f, se); + if (ret < 0) { +@@ -1791,6 +1818,11 @@ static int qemu_loadvm_state_main(QEMUFile *f, +MigrationIncomingState *mis) + section_id, le->se->idstr); + return ret; + } ++ if (section_type == QEMU_VM_SECTION_END) { ++ error_report("after loading state section id %d(%s)", ++ section_id, le->se->idstr); ++ check_host_md5(); ++ } + if (!check_section_footer(f, le)) { + return -EINVAL; + } +@@ -1901,6 +1933,8 @@ int qemu_loadvm_state(QEMUFile *f) + } + + cpu_synchronize_all_post_init(); ++ error_report("%s: after cpu_synchronize_all_post_init\n", __func__); ++ check_host_md5(); + + return ret; + } +-- +Dr. David Alan Gilbert / address@hidden / Manchester, UK + + +. +-- +Best regards. +Li Zhijian (8555) + +* Li Zhijian (address@hidden) wrote: +> +> +> +On 12/03/2015 05:24 PM, Dr. David Alan Gilbert wrote: +> +>* Li Zhijian (address@hidden) wrote: +> +>>Hi all, +> +>> +> +>>Does anyboday remember the similar issue post by hailiang months ago +> +>> +http://patchwork.ozlabs.org/patch/454322/ +> +>>At least tow bugs about migration had been fixed since that. +> +> +> +>Yes, I wondered what happened to that. +> +> +> +>>And now we found the same issue at the tcg vm(kvm is fine), after migration, +> +>>the content VM's memory is inconsistent. +> +> +> +>Hmm, TCG only - I don't know much about that; but I guess something must +> +>be accessing memory without using the proper macros/functions so +> +>it doesn't mark it as dirty. +> +> +> +>>we add a patch to check memory content, you can find it from affix +> +>> +> +>>steps to reporduce: +> +>>1) apply the patch and re-build qemu +> +>>2) prepare the ubuntu guest and run memtest in grub. +> +>>soruce side: +> +>>x86_64-softmmu/qemu-system-x86_64 -netdev tap,id=hn0 -device +> +>>e1000,id=net-pci0,netdev=hn0,mac=52:54:00:12:34:65 -boot c -drive +> +>>if=none,file=/home/lizj/ubuntu.raw,id=drive-virtio-disk0 -device +> +>>virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 +> +>>-vnc :7 -m 128 -smp 1 -device piix3-usb-uhci -device usb-tablet -qmp +> +>>tcp::4444,server,nowait -monitor stdio -cpu qemu64 -machine +> +>>pc-i440fx-2.3,accel=tcg,usb=off +> +>> +> +>>destination side: +> +>>x86_64-softmmu/qemu-system-x86_64 -netdev tap,id=hn0 -device +> +>>e1000,id=net-pci0,netdev=hn0,mac=52:54:00:12:34:65 -boot c -drive +> +>>if=none,file=/home/lizj/ubuntu.raw,id=drive-virtio-disk0 -device +> +>>virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 +> +>>-vnc :7 -m 128 -smp 1 -device piix3-usb-uhci -device usb-tablet -qmp +> +>>tcp::4444,server,nowait -monitor stdio -cpu qemu64 -machine +> +>>pc-i440fx-2.3,accel=tcg,usb=off -incoming tcp:0:8881 +> +>> +> +>>3) start migration +> +>>with 1000M NIC, migration will finish within 3 min. +> +>> +> +>>at source: +> +>>(qemu) migrate tcp:192.168.2.66:8881 +> +>>after saving ram complete +> +>>e9e725df678d392b1a83b3a917f332bb +> +>>qemu-system-x86_64: end ram md5 +> +>>(qemu) +> +>> +> +>>at destination: +> +>>...skip... +> +>>Completed load of VM with exit code 0 seq iteration 1264 +> +>>Completed load of VM with exit code 0 seq iteration 1265 +> +>>Completed load of VM with exit code 0 seq iteration 1266 +> +>>qemu-system-x86_64: after loading state section id 2(ram) +> +>>49c2dac7bde0e5e22db7280dcb3824f9 +> +>>qemu-system-x86_64: end ram md5 +> +>>qemu-system-x86_64: qemu_loadvm_state: after cpu_synchronize_all_post_init +> +>> +> +>>49c2dac7bde0e5e22db7280dcb3824f9 +> +>>qemu-system-x86_64: end ram md5 +> +>> +> +>>This occurs occasionally and only at tcg machine. It seems that +> +>>some pages dirtied in source side don't transferred to destination. +> +>>This problem can be reproduced even if we disable virtio. +> +>> +> +>>Is it OK for some pages that not transferred to destination when do +> +>>migration ? Or is it a bug? +> +> +> +>I'm pretty sure that means it's a bug. Hard to find though, I guess +> +>at least memtest is smaller than a big OS. I think I'd dump the whole +> +>of memory on both sides, hexdump and diff them - I'd guess it would +> +>just be one byte/word different, maybe that would offer some idea what +> +>wrote it. +> +> +I try to dump and compare them, more than 10 pages are different. +> +in source side, they are random value rather than always 'FF' 'FB' 'EF' +> +'BF'... in destination. +> +> +and not all of the different pages are continuous. +I wonder if it happens on all of memtest's different test patterns, +perhaps it might be possible to narrow it down if you tell memtest +to only run one test at a time. + +Dave + +> +> +thanks +> +Li +> +> +> +> +> +>Dave +> +> +> +>>Any idea... +> +>> +> +>>=================md5 check patch============================= +> +>> +> +>>diff --git a/Makefile.target b/Makefile.target +> +>>index 962d004..e2cb8e9 100644 +> +>>--- a/Makefile.target +> +>>+++ b/Makefile.target +> +>>@@ -139,7 +139,7 @@ obj-y += memory.o cputlb.o +> +>> obj-y += memory_mapping.o +> +>> obj-y += dump.o +> +>> obj-y += migration/ram.o migration/savevm.o +> +>>-LIBS := $(libs_softmmu) $(LIBS) +> +>>+LIBS := $(libs_softmmu) $(LIBS) -lplumb +> +>> +> +>> # xen support +> +>> obj-$(CONFIG_XEN) += xen-common.o +> +>>diff --git a/migration/ram.c b/migration/ram.c +> +>>index 1eb155a..3b7a09d 100644 +> +>>--- a/migration/ram.c +> +>>+++ b/migration/ram.c +> +>>@@ -2513,7 +2513,7 @@ static int ram_load(QEMUFile *f, void *opaque, int +> +>>version_id) +> +>> } +> +>> +> +>> rcu_read_unlock(); +> +>>- DPRINTF("Completed load of VM with exit code %d seq iteration " +> +>>+ fprintf(stderr, "Completed load of VM with exit code %d seq iteration " +> +>> "%" PRIu64 "\n", ret, seq_iter); +> +>> return ret; +> +>> } +> +>>diff --git a/migration/savevm.c b/migration/savevm.c +> +>>index 0ad1b93..3feaa61 100644 +> +>>--- a/migration/savevm.c +> +>>+++ b/migration/savevm.c +> +>>@@ -891,6 +891,29 @@ void qemu_savevm_state_header(QEMUFile *f) +> +>> +> +>> } +> +>> +> +>>+#include "exec/ram_addr.h" +> +>>+#include "qemu/rcu_queue.h" +> +>>+#include +> +>>+#ifndef MD5_DIGEST_LENGTH +> +>>+#define MD5_DIGEST_LENGTH 16 +> +>>+#endif +> +>>+ +> +>>+static void check_host_md5(void) +> +>>+{ +> +>>+ int i; +> +>>+ unsigned char md[MD5_DIGEST_LENGTH]; +> +>>+ rcu_read_lock(); +> +>>+ RAMBlock *block = QLIST_FIRST_RCU(&ram_list.blocks);/* Only check +> +>>'pc.ram' block */ +> +>>+ rcu_read_unlock(); +> +>>+ +> +>>+ MD5(block->host, block->used_length, md); +> +>>+ for(i = 0; i < MD5_DIGEST_LENGTH; i++) { +> +>>+ fprintf(stderr, "%02x", md[i]); +> +>>+ } +> +>>+ fprintf(stderr, "\n"); +> +>>+ error_report("end ram md5"); +> +>>+} +> +>>+ +> +>> void qemu_savevm_state_begin(QEMUFile *f, +> +>> const MigrationParams *params) +> +>> { +> +>>@@ -1056,6 +1079,10 @@ void qemu_savevm_state_complete_precopy(QEMUFile *f, +> +>>bool iterable_only) +> +>> save_section_header(f, se, QEMU_VM_SECTION_END); +> +>> +> +>> ret = se->ops->save_live_complete_precopy(f, se->opaque); +> +>>+ +> +>>+ fprintf(stderr, "after saving %s complete\n", se->idstr); +> +>>+ check_host_md5(); +> +>>+ +> +>> trace_savevm_section_end(se->idstr, se->section_id, ret); +> +>> save_section_footer(f, se); +> +>> if (ret < 0) { +> +>>@@ -1791,6 +1818,11 @@ static int qemu_loadvm_state_main(QEMUFile *f, +> +>>MigrationIncomingState *mis) +> +>> section_id, le->se->idstr); +> +>> return ret; +> +>> } +> +>>+ if (section_type == QEMU_VM_SECTION_END) { +> +>>+ error_report("after loading state section id %d(%s)", +> +>>+ section_id, le->se->idstr); +> +>>+ check_host_md5(); +> +>>+ } +> +>> if (!check_section_footer(f, le)) { +> +>> return -EINVAL; +> +>> } +> +>>@@ -1901,6 +1933,8 @@ int qemu_loadvm_state(QEMUFile *f) +> +>> } +> +>> +> +>> cpu_synchronize_all_post_init(); +> +>>+ error_report("%s: after cpu_synchronize_all_post_init\n", __func__); +> +>>+ check_host_md5(); +> +>> +> +>> return ret; +> +>> } +> +>> +> +>> +> +>> +> +>-- +> +>Dr. David Alan Gilbert / address@hidden / Manchester, UK +> +> +> +> +> +>. +> +> +> +> +-- +> +Best regards. +> +Li Zhijian (8555) +> +> +-- +Dr. David Alan Gilbert / address@hidden / Manchester, UK + +Li Zhijian wrote: +> +Hi all, +> +> +Does anyboday remember the similar issue post by hailiang months ago +> +http://patchwork.ozlabs.org/patch/454322/ +> +At least tow bugs about migration had been fixed since that. +> +> +And now we found the same issue at the tcg vm(kvm is fine), after +> +migration, the content VM's memory is inconsistent. +> +> +we add a patch to check memory content, you can find it from affix +> +> +steps to reporduce: +> +1) apply the patch and re-build qemu +> +2) prepare the ubuntu guest and run memtest in grub. +> +soruce side: +> +x86_64-softmmu/qemu-system-x86_64 -netdev tap,id=hn0 -device +> +e1000,id=net-pci0,netdev=hn0,mac=52:54:00:12:34:65 -boot c -drive +> +if=none,file=/home/lizj/ubuntu.raw,id=drive-virtio-disk0 -device +> +virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 +> +-vnc :7 -m 128 -smp 1 -device piix3-usb-uhci -device usb-tablet -qmp +> +tcp::4444,server,nowait -monitor stdio -cpu qemu64 -machine +> +pc-i440fx-2.3,accel=tcg,usb=off +> +> +destination side: +> +x86_64-softmmu/qemu-system-x86_64 -netdev tap,id=hn0 -device +> +e1000,id=net-pci0,netdev=hn0,mac=52:54:00:12:34:65 -boot c -drive +> +if=none,file=/home/lizj/ubuntu.raw,id=drive-virtio-disk0 -device +> +virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 +> +-vnc :7 -m 128 -smp 1 -device piix3-usb-uhci -device usb-tablet -qmp +> +tcp::4444,server,nowait -monitor stdio -cpu qemu64 -machine +> +pc-i440fx-2.3,accel=tcg,usb=off -incoming tcp:0:8881 +> +> +3) start migration +> +with 1000M NIC, migration will finish within 3 min. +> +> +at source: +> +(qemu) migrate tcp:192.168.2.66:8881 +> +after saving ram complete +> +e9e725df678d392b1a83b3a917f332bb +> +qemu-system-x86_64: end ram md5 +> +(qemu) +> +> +at destination: +> +...skip... +> +Completed load of VM with exit code 0 seq iteration 1264 +> +Completed load of VM with exit code 0 seq iteration 1265 +> +Completed load of VM with exit code 0 seq iteration 1266 +> +qemu-system-x86_64: after loading state section id 2(ram) +> +49c2dac7bde0e5e22db7280dcb3824f9 +> +qemu-system-x86_64: end ram md5 +> +qemu-system-x86_64: qemu_loadvm_state: after cpu_synchronize_all_post_init +> +> +49c2dac7bde0e5e22db7280dcb3824f9 +> +qemu-system-x86_64: end ram md5 +> +> +This occurs occasionally and only at tcg machine. It seems that +> +some pages dirtied in source side don't transferred to destination. +> +This problem can be reproduced even if we disable virtio. +> +> +Is it OK for some pages that not transferred to destination when do +> +migration ? Or is it a bug? +> +> +Any idea... +Thanks for describing how to reproduce the bug. +If some pages are not transferred to destination then it is a bug, so we +need to know what the problem is, notice that the problem can be that +TCG is not marking dirty some page, that Migration code "forgets" about +that page, or anything eles altogether, that is what we need to find. + +There are more posibilities, I am not sure that memtest is on 32bit +mode, and it is inside posibility that we are missing some state when we +are on real mode. + +Will try to take a look at this. + +THanks, again. + + +> +> +=================md5 check patch============================= +> +> +diff --git a/Makefile.target b/Makefile.target +> +index 962d004..e2cb8e9 100644 +> +--- a/Makefile.target +> ++++ b/Makefile.target +> +@@ -139,7 +139,7 @@ obj-y += memory.o cputlb.o +> +obj-y += memory_mapping.o +> +obj-y += dump.o +> +obj-y += migration/ram.o migration/savevm.o +> +-LIBS := $(libs_softmmu) $(LIBS) +> ++LIBS := $(libs_softmmu) $(LIBS) -lplumb +> +> +# xen support +> +obj-$(CONFIG_XEN) += xen-common.o +> +diff --git a/migration/ram.c b/migration/ram.c +> +index 1eb155a..3b7a09d 100644 +> +--- a/migration/ram.c +> ++++ b/migration/ram.c +> +@@ -2513,7 +2513,7 @@ static int ram_load(QEMUFile *f, void *opaque, +> +int version_id) +> +} +> +> +rcu_read_unlock(); +> +- DPRINTF("Completed load of VM with exit code %d seq iteration " +> ++ fprintf(stderr, "Completed load of VM with exit code %d seq iteration " +> +"%" PRIu64 "\n", ret, seq_iter); +> +return ret; +> +} +> +diff --git a/migration/savevm.c b/migration/savevm.c +> +index 0ad1b93..3feaa61 100644 +> +--- a/migration/savevm.c +> ++++ b/migration/savevm.c +> +@@ -891,6 +891,29 @@ void qemu_savevm_state_header(QEMUFile *f) +> +> +} +> +> ++#include "exec/ram_addr.h" +> ++#include "qemu/rcu_queue.h" +> ++#include +> ++#ifndef MD5_DIGEST_LENGTH +> ++#define MD5_DIGEST_LENGTH 16 +> ++#endif +> ++ +> ++static void check_host_md5(void) +> ++{ +> ++ int i; +> ++ unsigned char md[MD5_DIGEST_LENGTH]; +> ++ rcu_read_lock(); +> ++ RAMBlock *block = QLIST_FIRST_RCU(&ram_list.blocks);/* Only check +> +'pc.ram' block */ +> ++ rcu_read_unlock(); +> ++ +> ++ MD5(block->host, block->used_length, md); +> ++ for(i = 0; i < MD5_DIGEST_LENGTH; i++) { +> ++ fprintf(stderr, "%02x", md[i]); +> ++ } +> ++ fprintf(stderr, "\n"); +> ++ error_report("end ram md5"); +> ++} +> ++ +> +void qemu_savevm_state_begin(QEMUFile *f, +> +const MigrationParams *params) +> +{ +> +@@ -1056,6 +1079,10 @@ void +> +qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only) +> +save_section_header(f, se, QEMU_VM_SECTION_END); +> +> +ret = se->ops->save_live_complete_precopy(f, se->opaque); +> ++ +> ++ fprintf(stderr, "after saving %s complete\n", se->idstr); +> ++ check_host_md5(); +> ++ +> +trace_savevm_section_end(se->idstr, se->section_id, ret); +> +save_section_footer(f, se); +> +if (ret < 0) { +> +@@ -1791,6 +1818,11 @@ static int qemu_loadvm_state_main(QEMUFile *f, +> +MigrationIncomingState *mis) +> +section_id, le->se->idstr); +> +return ret; +> +} +> ++ if (section_type == QEMU_VM_SECTION_END) { +> ++ error_report("after loading state section id %d(%s)", +> ++ section_id, le->se->idstr); +> ++ check_host_md5(); +> ++ } +> +if (!check_section_footer(f, le)) { +> +return -EINVAL; +> +} +> +@@ -1901,6 +1933,8 @@ int qemu_loadvm_state(QEMUFile *f) +> +} +> +> +cpu_synchronize_all_post_init(); +> ++ error_report("%s: after cpu_synchronize_all_post_init\n", __func__); +> ++ check_host_md5(); +> +> +return ret; +> +} + +> +> +Thanks for describing how to reproduce the bug. +> +If some pages are not transferred to destination then it is a bug, so we need +> +to know what the problem is, notice that the problem can be that TCG is not +> +marking dirty some page, that Migration code "forgets" about that page, or +> +anything eles altogether, that is what we need to find. +> +> +There are more posibilities, I am not sure that memtest is on 32bit mode, and +> +it is inside posibility that we are missing some state when we are on real +> +mode. +> +> +Will try to take a look at this. +> +> +THanks, again. +> +Hi Juan & Amit + + Do you think we should add a mechanism to check the data integrity during LM +like Zhijian's patch did? it may be very helpful for developers. + Actually, I did the similar thing before in order to make sure that I did the +right thing we I change the code related to LM. + +Liang + +On (Fri) 04 Dec 2015 [01:43:07], Li, Liang Z wrote: +> +> +> +> Thanks for describing how to reproduce the bug. +> +> If some pages are not transferred to destination then it is a bug, so we +> +> need +> +> to know what the problem is, notice that the problem can be that TCG is not +> +> marking dirty some page, that Migration code "forgets" about that page, or +> +> anything eles altogether, that is what we need to find. +> +> +> +> There are more posibilities, I am not sure that memtest is on 32bit mode, +> +> and +> +> it is inside posibility that we are missing some state when we are on real +> +> mode. +> +> +> +> Will try to take a look at this. +> +> +> +> THanks, again. +> +> +> +> +Hi Juan & Amit +> +> +Do you think we should add a mechanism to check the data integrity during LM +> +like Zhijian's patch did? it may be very helpful for developers. +> +Actually, I did the similar thing before in order to make sure that I did +> +the right thing we I change the code related to LM. +If you mean for debugging, something that's not always on, then I'm +fine with it. + +A script that goes along that shows the result of comparison of the +diff will be helpful too, something that shows how many pages are +differnt, how many bytes in a page on average, and so on. + + Amit + diff --git a/classification_output/01/mistranslation/74545755 b/classification_output/01/mistranslation/74545755 new file mode 100644 index 000000000..32d247ac7 --- /dev/null +++ b/classification_output/01/mistranslation/74545755 @@ -0,0 +1,344 @@ +mistranslation: 0.752 +instruction: 0.700 +other: 0.683 +semantic: 0.669 + +[Bug Report][RFC PATCH 0/1] block: fix failing assert on paused VM migration + +There's a bug (failing assert) which is reproduced during migration of +a paused VM. I am able to reproduce it on a stand with 2 nodes and a common +NFS share, with VM's disk on that share. + +root@fedora40-1-vm:~# virsh domblklist alma8-vm + Target Source +------------------------------------------ + sda /mnt/shared/images/alma8.qcow2 + +root@fedora40-1-vm:~# df -Th /mnt/shared +Filesystem Type Size Used Avail Use% Mounted on +127.0.0.1:/srv/nfsd nfs4 63G 16G 48G 25% /mnt/shared + +On the 1st node: + +root@fedora40-1-vm:~# virsh start alma8-vm ; virsh suspend alma8-vm +root@fedora40-1-vm:~# virsh migrate --compressed --p2p --persistent +--undefinesource --live alma8-vm qemu+ssh://fedora40-2-vm/system + +Then on the 2nd node: + +root@fedora40-2-vm:~# virsh migrate --compressed --p2p --persistent +--undefinesource --live alma8-vm qemu+ssh://fedora40-1-vm/system +error: operation failed: domain is not running + +root@fedora40-2-vm:~# tail -3 /var/log/libvirt/qemu/alma8-vm.log +2024-09-19 13:53:33.336+0000: initiating migration +qemu-system-x86_64: ../block.c:6976: int +bdrv_inactivate_recurse(BlockDriverState *): Assertion `!(bs->open_flags & +BDRV_O_INACTIVE)' failed. +2024-09-19 13:53:42.991+0000: shutting down, reason=crashed + +Backtrace: + +(gdb) bt +#0 0x00007f7eaa2f1664 in __pthread_kill_implementation () at /lib64/libc.so.6 +#1 0x00007f7eaa298c4e in raise () at /lib64/libc.so.6 +#2 0x00007f7eaa280902 in abort () at /lib64/libc.so.6 +#3 0x00007f7eaa28081e in __assert_fail_base.cold () at /lib64/libc.so.6 +#4 0x00007f7eaa290d87 in __assert_fail () at /lib64/libc.so.6 +#5 0x0000563c38b95eb8 in bdrv_inactivate_recurse (bs=0x563c3b6c60c0) at +../block.c:6976 +#6 0x0000563c38b95aeb in bdrv_inactivate_all () at ../block.c:7038 +#7 0x0000563c3884d354 in qemu_savevm_state_complete_precopy_non_iterable +(f=0x563c3b700c20, in_postcopy=false, inactivate_disks=true) + at ../migration/savevm.c:1571 +#8 0x0000563c3884dc1a in qemu_savevm_state_complete_precopy (f=0x563c3b700c20, +iterable_only=false, inactivate_disks=true) at ../migration/savevm.c:1631 +#9 0x0000563c3883a340 in migration_completion_precopy (s=0x563c3b4d51f0, +current_active_state=) at ../migration/migration.c:2780 +#10 migration_completion (s=0x563c3b4d51f0) at ../migration/migration.c:2844 +#11 migration_iteration_run (s=0x563c3b4d51f0) at ../migration/migration.c:3270 +#12 migration_thread (opaque=0x563c3b4d51f0) at ../migration/migration.c:3536 +#13 0x0000563c38dbcf14 in qemu_thread_start (args=0x563c3c2d5bf0) at +../util/qemu-thread-posix.c:541 +#14 0x00007f7eaa2ef6d7 in start_thread () at /lib64/libc.so.6 +#15 0x00007f7eaa373414 in clone () at /lib64/libc.so.6 + +What happens here is that after 1st migration BDS related to HDD remains +inactive as VM is still paused. Then when we initiate 2nd migration, +bdrv_inactivate_all() leads to the attempt to set BDRV_O_INACTIVE flag +on that node which is already set, thus assert fails. + +Attached patch which simply skips setting flag if it's already set is more +of a kludge than a clean solution. Should we use more sophisticated logic +which allows some of the nodes be in inactive state prior to the migration, +and takes them into account during bdrv_inactivate_all()? Comments would +be appreciated. + +Andrey + +Andrey Drobyshev (1): + block: do not fail when inactivating node which is inactive + + block.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +-- +2.39.3 + +Instead of throwing an assert let's just ignore that flag is already set +and return. We assume that it's going to be safe to ignore. Otherwise +this assert fails when migrating a paused VM back and forth. + +Ideally we'd like to have a more sophisticated solution, e.g. not even +scan the nodes which should be inactive at this point. + +Signed-off-by: Andrey Drobyshev +--- + block.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/block.c b/block.c +index 7d90007cae..c1dcf906d1 100644 +--- a/block.c ++++ b/block.c +@@ -6973,7 +6973,15 @@ static int GRAPH_RDLOCK +bdrv_inactivate_recurse(BlockDriverState *bs) + return 0; + } + +- assert(!(bs->open_flags & BDRV_O_INACTIVE)); ++ if (bs->open_flags & BDRV_O_INACTIVE) { ++ /* ++ * Return here instead of throwing assert as a workaround to ++ * prevent failure on migrating paused VM. ++ * Here we assume that if we're trying to inactivate BDS that's ++ * already inactive, it's safe to just ignore it. ++ */ ++ return 0; ++ } + + /* Inactivate this node */ + if (bs->drv->bdrv_inactivate) { +-- +2.39.3 + +[add migration maintainers] + +On 24.09.24 15:56, Andrey Drobyshev wrote: +Instead of throwing an assert let's just ignore that flag is already set +and return. We assume that it's going to be safe to ignore. Otherwise +this assert fails when migrating a paused VM back and forth. + +Ideally we'd like to have a more sophisticated solution, e.g. not even +scan the nodes which should be inactive at this point. + +Signed-off-by: Andrey Drobyshev +--- + block.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/block.c b/block.c +index 7d90007cae..c1dcf906d1 100644 +--- a/block.c ++++ b/block.c +@@ -6973,7 +6973,15 @@ static int GRAPH_RDLOCK +bdrv_inactivate_recurse(BlockDriverState *bs) + return 0; + } +- assert(!(bs->open_flags & BDRV_O_INACTIVE)); ++ if (bs->open_flags & BDRV_O_INACTIVE) { ++ /* ++ * Return here instead of throwing assert as a workaround to ++ * prevent failure on migrating paused VM. ++ * Here we assume that if we're trying to inactivate BDS that's ++ * already inactive, it's safe to just ignore it. ++ */ ++ return 0; ++ } +/* Inactivate this node */ +if (bs->drv->bdrv_inactivate) { +I doubt that this a correct way to go. + +As far as I understand, "inactive" actually means that "storage is not belong to +qemu, but to someone else (another qemu process for example), and may be changed +transparently". In turn this means that Qemu should do nothing with inactive disks. So the +problem is that nobody called bdrv_activate_all on target, and we shouldn't ignore that. + +Hmm, I see in process_incoming_migration_bh() we do call bdrv_activate_all(), +but only in some scenarios. May be, the condition should be less strict here. + +Why we need any condition here at all? Don't we want to activate block-layer on +target after migration anyway? + +-- +Best regards, +Vladimir + +On 9/30/24 12:25 PM, Vladimir Sementsov-Ogievskiy wrote: +> +[add migration maintainers] +> +> +On 24.09.24 15:56, Andrey Drobyshev wrote: +> +> [...] +> +> +I doubt that this a correct way to go. +> +> +As far as I understand, "inactive" actually means that "storage is not +> +belong to qemu, but to someone else (another qemu process for example), +> +and may be changed transparently". In turn this means that Qemu should +> +do nothing with inactive disks. So the problem is that nobody called +> +bdrv_activate_all on target, and we shouldn't ignore that. +> +> +Hmm, I see in process_incoming_migration_bh() we do call +> +bdrv_activate_all(), but only in some scenarios. May be, the condition +> +should be less strict here. +> +> +Why we need any condition here at all? Don't we want to activate +> +block-layer on target after migration anyway? +> +Hmm I'm not sure about the unconditional activation, since we at least +have to honor LATE_BLOCK_ACTIVATE cap if it's set (and probably delay it +in such a case). In current libvirt upstream I see such code: + +> +/* Migration capabilities which should always be enabled as long as they +> +> +* are supported by QEMU. If the capability is supposed to be enabled on both +> +> +* sides of migration, it won't be enabled unless both sides support it. +> +> +*/ +> +> +static const qemuMigrationParamsAlwaysOnItem qemuMigrationParamsAlwaysOn[] = +> +{ +> +> +{QEMU_MIGRATION_CAP_PAUSE_BEFORE_SWITCHOVER, +> +> +QEMU_MIGRATION_SOURCE}, +> +> +> +> +{QEMU_MIGRATION_CAP_LATE_BLOCK_ACTIVATE, +> +> +QEMU_MIGRATION_DESTINATION}, +> +> +}; +which means that libvirt always wants LATE_BLOCK_ACTIVATE to be set. + +The code from process_incoming_migration_bh() you're referring to: + +> +/* If capability late_block_activate is set: +> +> +* Only fire up the block code now if we're going to restart the +> +> +* VM, else 'cont' will do it. +> +> +* This causes file locking to happen; so we don't want it to happen +> +> +* unless we really are starting the VM. +> +> +*/ +> +> +if (!migrate_late_block_activate() || +> +> +(autostart && (!global_state_received() || +> +> +runstate_is_live(global_state_get_runstate())))) { +> +> +/* Make sure all file formats throw away their mutable metadata. +> +> +> +* If we get an error here, just don't restart the VM yet. */ +> +> +bdrv_activate_all(&local_err); +> +> +if (local_err) { +> +> +error_report_err(local_err); +> +> +local_err = NULL; +> +> +autostart = false; +> +> +} +> +> +} +It states explicitly that we're either going to start VM right at this +point if (autostart == true), or we wait till "cont" command happens. +None of this is going to happen if we start another migration while +still being in PAUSED state. So I think it seems reasonable to take +such case into account. For instance, this patch does prevent the crash: + +> +diff --git a/migration/migration.c b/migration/migration.c +> +index ae2be31557..3222f6745b 100644 +> +--- a/migration/migration.c +> ++++ b/migration/migration.c +> +@@ -733,7 +733,8 @@ static void process_incoming_migration_bh(void *opaque) +> +*/ +> +if (!migrate_late_block_activate() || +> +(autostart && (!global_state_received() || +> +- runstate_is_live(global_state_get_runstate())))) { +> ++ runstate_is_live(global_state_get_runstate()))) || +> ++ (!autostart && global_state_get_runstate() == RUN_STATE_PAUSED)) { +> +/* Make sure all file formats throw away their mutable metadata. +> +* If we get an error here, just don't restart the VM yet. */ +> +bdrv_activate_all(&local_err); +What are your thoughts on it? + +Andrey + diff --git a/classification_output/01/mistranslation/7711787 b/classification_output/01/mistranslation/7711787 deleted file mode 100644 index ead1f32fd..000000000 --- a/classification_output/01/mistranslation/7711787 +++ /dev/null @@ -1,165 +0,0 @@ -mistranslation: 0.915 -semantic: 0.904 -instruction: 0.888 -other: 0.813 - -[BUG] cxl,i386: e820 mappings may not be correct for cxl - -Context included below from prior discussion - - `cxl create-region` would fail on inability to allocate memory - - traced this down to the memory region being marked RESERVED - - E820 map marks the CXL fixed memory window as RESERVED - - -Re: x86 errors, I found that region worked with this patch. (I also -added the SRAT patches the Davidlohr posted, but I do not think they are -relevant). - -I don't think this is correct, and setting this to E820_RAM causes the -system to fail to boot at all, but with this change `cxl create-region` -succeeds, which suggests our e820 mappings in the i386 machine are -incorrect. - -Anyone who can help or have an idea as to what e820 should actually be -doing with this region, or if this is correct and something else is -failing, please help! - - -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 566accf7e6..a5e688a742 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -1077,7 +1077,7 @@ void pc_memory_init(PCMachineState *pcms, - memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, fw, - "cxl-fixed-memory-region", fw->size); - memory_region_add_subregion(system_memory, fw->base, &fw->mr); -- e820_add_entry(fw->base, fw->size, E820_RESERVED); -+ e820_add_entry(fw->base, fw->size, E820_NVS); - cxl_fmw_base += fw->size; - cxl_resv_end = cxl_fmw_base; - } - - -On Mon, Oct 10, 2022 at 05:32:42PM +0100, Jonathan Cameron wrote: -> -> -> > but i'm not sure of what to do with this info. We have some proof -> -> > that real hardware works with this no problem, and the only difference -> -> > is that the EFI/bios/firmware is setting the memory regions as `usable` -> -> > or `soft reserved`, which would imply the EDK2 is the blocker here -> -> > regardless of the OS driver status. -> -> > -> -> > But I'd seen elsewhere you had gotten some of this working, and I'm -> -> > failing to get anything working at the moment. If you have any input i -> -> > would greatly appreciate the help. -> -> > -> -> > QEMU config: -> -> > -> -> > /opt/qemu-cxl2/bin/qemu-system-x86_64 \ -> -> > -drive -> -> > file=/var/lib/libvirt/images/cxl.qcow2,format=qcow2,index=0,media=d\ -> -> > -m 2G,slots=4,maxmem=4G \ -> -> > -smp 4 \ -> -> > -machine type=q35,accel=kvm,cxl=on \ -> -> > -enable-kvm \ -> -> > -nographic \ -> -> > -device pxb-cxl,id=cxl.0,bus=pcie.0,bus_nr=52 \ -> -> > -device cxl-rp,id=rp0,bus=cxl.0,chassis=0,slot=0 \ -> -> > -object memory-backend-file,id=cxl-mem0,mem-path=/tmp/cxl-mem0,size=256M \ -> -> > -object memory-backend-file,id=lsa0,mem-path=/tmp/cxl-lsa0,size=256M \ -> -> > -device cxl-type3,bus=rp0,pmem=true,memdev=cxl-mem0,lsa=lsa0,id=cxl-pmem0 -> -> > \ -> -> > -M cxl-fmw.0.targets.0=cxl.0,cxl-fmw.0.size=256M -> -> > -> -> > I'd seen on the lists that you had seen issues with single-rp setups, -> -> > but no combination of configuration I've tried (including all the ones -> -> > in the docs and tests) lead to a successful region creation with -> -> > `cxl create-region` -> -> -> -> Hmm. Let me have a play. I've not run x86 tests for a while so -> -> perhaps something is missing there. -> -> -> -> I'm carrying a patch to override check_last_peer() in -> -> cxl_port_setup_targets() as that is wrong for some combinations, -> -> but that doesn't look like it's related to what you are seeing. -> -> -I'm not sure if it's relevant, but turned out I'd forgotten I'm carrying 3 -> -patches that aren't upstream (and one is a horrible hack). -> -> -Hack: -https://lore.kernel.org/linux-cxl/20220819094655.000005ed@huawei.com/ -> -Shouldn't affect a simple case like this... -> -> -https://lore.kernel.org/linux-cxl/20220819093133.00006c22@huawei.com/T/#t -> -(Dan's version) -> -> -https://lore.kernel.org/linux-cxl/20220815154044.24733-1-Jonathan.Cameron@huawei.com/T/#t -> -> -For writes to work you will currently need two rps (nothing on the second is -> -fine) -> -as we still haven't resolved if the kernel should support an HDM decoder on -> -a host bridge with one port. I think it should (Spec allows it), others -> -unconvinced. -> -> -Note I haven't shifted over to x86 yet so may still be something different -> -from -> -arm64. -> -> -Jonathan -> -> - diff --git a/classification_output/01/mistranslation/80604314 b/classification_output/01/mistranslation/80604314 new file mode 100644 index 000000000..798c2e866 --- /dev/null +++ b/classification_output/01/mistranslation/80604314 @@ -0,0 +1,1480 @@ +mistranslation: 0.922 +other: 0.898 +semantic: 0.890 +instruction: 0.877 + +[BUG] vhost-vdpa: qemu-system-s390x crashes with second virtio-net-ccw device + +When I start qemu with a second virtio-net-ccw device (i.e. adding +-device virtio-net-ccw in addition to the autogenerated device), I get +a segfault. gdb points to + +#0 0x000055d6ab52681d in virtio_net_get_config (vdev=, + config=0x55d6ad9e3f80 "RT") at /home/cohuck/git/qemu/hw/net/virtio-net.c:146 +146 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { + +(backtrace doesn't go further) + +Starting qemu with no additional "-device virtio-net-ccw" (i.e., only +the autogenerated virtio-net-ccw device is present) works. Specifying +several "-device virtio-net-pci" works as well. + +Things break with 1e0a84ea49b6 ("vhost-vdpa: introduce vhost-vdpa net +client"), 38140cc4d971 ("vhost_net: introduce set_config & get_config") +works (in-between state does not compile). + +This is reproducible with tcg as well. Same problem both with +--enable-vhost-vdpa and --disable-vhost-vdpa. + +Have not yet tried to figure out what might be special with +virtio-ccw... anyone have an idea? + +[This should probably be considered a blocker?] + +On Fri, Jul 24, 2020 at 03:27:18PM +0200, Cornelia Huck wrote: +> +When I start qemu with a second virtio-net-ccw device (i.e. adding +> +-device virtio-net-ccw in addition to the autogenerated device), I get +> +a segfault. gdb points to +> +> +#0 0x000055d6ab52681d in virtio_net_get_config (vdev=, +> +config=0x55d6ad9e3f80 "RT") at +> +/home/cohuck/git/qemu/hw/net/virtio-net.c:146 +> +146 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { +> +> +(backtrace doesn't go further) +> +> +Starting qemu with no additional "-device virtio-net-ccw" (i.e., only +> +the autogenerated virtio-net-ccw device is present) works. Specifying +> +several "-device virtio-net-pci" works as well. +> +> +Things break with 1e0a84ea49b6 ("vhost-vdpa: introduce vhost-vdpa net +> +client"), 38140cc4d971 ("vhost_net: introduce set_config & get_config") +> +works (in-between state does not compile). +Ouch. I didn't test all in-between states :( +But I wish we had a 0-day instrastructure like kernel has, +that catches things like that. + +> +This is reproducible with tcg as well. Same problem both with +> +--enable-vhost-vdpa and --disable-vhost-vdpa. +> +> +Have not yet tried to figure out what might be special with +> +virtio-ccw... anyone have an idea? +> +> +[This should probably be considered a blocker?] + +On Fri, 24 Jul 2020 09:30:58 -0400 +"Michael S. Tsirkin" wrote: + +> +On Fri, Jul 24, 2020 at 03:27:18PM +0200, Cornelia Huck wrote: +> +> When I start qemu with a second virtio-net-ccw device (i.e. adding +> +> -device virtio-net-ccw in addition to the autogenerated device), I get +> +> a segfault. gdb points to +> +> +> +> #0 0x000055d6ab52681d in virtio_net_get_config (vdev=, +> +> config=0x55d6ad9e3f80 "RT") at +> +> /home/cohuck/git/qemu/hw/net/virtio-net.c:146 +> +> 146 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { +> +> +> +> (backtrace doesn't go further) +The core was incomplete, but running under gdb directly shows that it +is just a bog-standard config space access (first for that device). + +The cause of the crash is that nc->peer is not set... no idea how that +can happen, not that familiar with that part of QEMU. (Should the code +check, or is that really something that should not happen?) + +What I don't understand is why it is set correctly for the first, +autogenerated virtio-net-ccw device, but not for the second one, and +why virtio-net-pci doesn't show these problems. The only difference +between -ccw and -pci that comes to my mind here is that config space +accesses for ccw are done via an asynchronous operation, so timing +might be different. + +> +> +> +> Starting qemu with no additional "-device virtio-net-ccw" (i.e., only +> +> the autogenerated virtio-net-ccw device is present) works. Specifying +> +> several "-device virtio-net-pci" works as well. +> +> +> +> Things break with 1e0a84ea49b6 ("vhost-vdpa: introduce vhost-vdpa net +> +> client"), 38140cc4d971 ("vhost_net: introduce set_config & get_config") +> +> works (in-between state does not compile). +> +> +Ouch. I didn't test all in-between states :( +> +But I wish we had a 0-day instrastructure like kernel has, +> +that catches things like that. +Yep, that would be useful... so patchew only builds the complete series? + +> +> +> This is reproducible with tcg as well. Same problem both with +> +> --enable-vhost-vdpa and --disable-vhost-vdpa. +> +> +> +> Have not yet tried to figure out what might be special with +> +> virtio-ccw... anyone have an idea? +> +> +> +> [This should probably be considered a blocker?] +I think so, as it makes s390x unusable with more that one +virtio-net-ccw device, and I don't even see a workaround. + +On Fri, Jul 24, 2020 at 04:56:27PM +0200, Cornelia Huck wrote: +> +On Fri, 24 Jul 2020 09:30:58 -0400 +> +"Michael S. Tsirkin" wrote: +> +> +> On Fri, Jul 24, 2020 at 03:27:18PM +0200, Cornelia Huck wrote: +> +> > When I start qemu with a second virtio-net-ccw device (i.e. adding +> +> > -device virtio-net-ccw in addition to the autogenerated device), I get +> +> > a segfault. gdb points to +> +> > +> +> > #0 0x000055d6ab52681d in virtio_net_get_config (vdev=, +> +> > config=0x55d6ad9e3f80 "RT") at +> +> > /home/cohuck/git/qemu/hw/net/virtio-net.c:146 +> +> > 146 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { +> +> > +> +> > (backtrace doesn't go further) +> +> +The core was incomplete, but running under gdb directly shows that it +> +is just a bog-standard config space access (first for that device). +> +> +The cause of the crash is that nc->peer is not set... no idea how that +> +can happen, not that familiar with that part of QEMU. (Should the code +> +check, or is that really something that should not happen?) +> +> +What I don't understand is why it is set correctly for the first, +> +autogenerated virtio-net-ccw device, but not for the second one, and +> +why virtio-net-pci doesn't show these problems. The only difference +> +between -ccw and -pci that comes to my mind here is that config space +> +accesses for ccw are done via an asynchronous operation, so timing +> +might be different. +Hopefully Jason has an idea. Could you post a full command line +please? Do you need a working guest to trigger this? Does this trigger +on an x86 host? + +> +> > +> +> > Starting qemu with no additional "-device virtio-net-ccw" (i.e., only +> +> > the autogenerated virtio-net-ccw device is present) works. Specifying +> +> > several "-device virtio-net-pci" works as well. +> +> > +> +> > Things break with 1e0a84ea49b6 ("vhost-vdpa: introduce vhost-vdpa net +> +> > client"), 38140cc4d971 ("vhost_net: introduce set_config & get_config") +> +> > works (in-between state does not compile). +> +> +> +> Ouch. I didn't test all in-between states :( +> +> But I wish we had a 0-day instrastructure like kernel has, +> +> that catches things like that. +> +> +Yep, that would be useful... so patchew only builds the complete series? +> +> +> +> +> > This is reproducible with tcg as well. Same problem both with +> +> > --enable-vhost-vdpa and --disable-vhost-vdpa. +> +> > +> +> > Have not yet tried to figure out what might be special with +> +> > virtio-ccw... anyone have an idea? +> +> > +> +> > [This should probably be considered a blocker?] +> +> +I think so, as it makes s390x unusable with more that one +> +virtio-net-ccw device, and I don't even see a workaround. + +On Fri, 24 Jul 2020 11:17:57 -0400 +"Michael S. Tsirkin" wrote: + +> +On Fri, Jul 24, 2020 at 04:56:27PM +0200, Cornelia Huck wrote: +> +> On Fri, 24 Jul 2020 09:30:58 -0400 +> +> "Michael S. Tsirkin" wrote: +> +> +> +> > On Fri, Jul 24, 2020 at 03:27:18PM +0200, Cornelia Huck wrote: +> +> > > When I start qemu with a second virtio-net-ccw device (i.e. adding +> +> > > -device virtio-net-ccw in addition to the autogenerated device), I get +> +> > > a segfault. gdb points to +> +> > > +> +> > > #0 0x000055d6ab52681d in virtio_net_get_config (vdev=, +> +> > > config=0x55d6ad9e3f80 "RT") at +> +> > > /home/cohuck/git/qemu/hw/net/virtio-net.c:146 +> +> > > 146 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { +> +> > > +> +> > > (backtrace doesn't go further) +> +> +> +> The core was incomplete, but running under gdb directly shows that it +> +> is just a bog-standard config space access (first for that device). +> +> +> +> The cause of the crash is that nc->peer is not set... no idea how that +> +> can happen, not that familiar with that part of QEMU. (Should the code +> +> check, or is that really something that should not happen?) +> +> +> +> What I don't understand is why it is set correctly for the first, +> +> autogenerated virtio-net-ccw device, but not for the second one, and +> +> why virtio-net-pci doesn't show these problems. The only difference +> +> between -ccw and -pci that comes to my mind here is that config space +> +> accesses for ccw are done via an asynchronous operation, so timing +> +> might be different. +> +> +Hopefully Jason has an idea. Could you post a full command line +> +please? Do you need a working guest to trigger this? Does this trigger +> +on an x86 host? +Yes, it does trigger with tcg-on-x86 as well. I've been using + +s390x-softmmu/qemu-system-s390x -M s390-ccw-virtio,accel=tcg -cpu qemu,zpci=on +-m 1024 -nographic -device virtio-scsi-ccw,id=scsi0,devno=fe.0.0001 +-drive file=/path/to/image,format=qcow2,if=none,id=drive-scsi0-0-0-0 +-device +scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0-0-0-0,id=scsi0-0-0-0,bootindex=1 + +-device virtio-net-ccw + +It seems it needs the guest actually doing something with the nics; I +cannot reproduce the crash if I use the old advent calendar moon buggy +image and just add a virtio-net-ccw device. + +(I don't think it's a problem with my local build, as I see the problem +both on my laptop and on an LPAR.) + +> +> +> > > +> +> > > Starting qemu with no additional "-device virtio-net-ccw" (i.e., only +> +> > > the autogenerated virtio-net-ccw device is present) works. Specifying +> +> > > several "-device virtio-net-pci" works as well. +> +> > > +> +> > > Things break with 1e0a84ea49b6 ("vhost-vdpa: introduce vhost-vdpa net +> +> > > client"), 38140cc4d971 ("vhost_net: introduce set_config & get_config") +> +> > > works (in-between state does not compile). +> +> > +> +> > Ouch. I didn't test all in-between states :( +> +> > But I wish we had a 0-day instrastructure like kernel has, +> +> > that catches things like that. +> +> +> +> Yep, that would be useful... so patchew only builds the complete series? +> +> +> +> > +> +> > > This is reproducible with tcg as well. Same problem both with +> +> > > --enable-vhost-vdpa and --disable-vhost-vdpa. +> +> > > +> +> > > Have not yet tried to figure out what might be special with +> +> > > virtio-ccw... anyone have an idea? +> +> > > +> +> > > [This should probably be considered a blocker?] +> +> +> +> I think so, as it makes s390x unusable with more that one +> +> virtio-net-ccw device, and I don't even see a workaround. +> + +On 2020/7/24 下午11:34, Cornelia Huck wrote: +On Fri, 24 Jul 2020 11:17:57 -0400 +"Michael S. Tsirkin" wrote: +On Fri, Jul 24, 2020 at 04:56:27PM +0200, Cornelia Huck wrote: +On Fri, 24 Jul 2020 09:30:58 -0400 +"Michael S. Tsirkin" wrote: +On Fri, Jul 24, 2020 at 03:27:18PM +0200, Cornelia Huck wrote: +When I start qemu with a second virtio-net-ccw device (i.e. adding +-device virtio-net-ccw in addition to the autogenerated device), I get +a segfault. gdb points to + +#0 0x000055d6ab52681d in virtio_net_get_config (vdev=, + config=0x55d6ad9e3f80 "RT") at +/home/cohuck/git/qemu/hw/net/virtio-net.c:146 +146 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { + +(backtrace doesn't go further) +The core was incomplete, but running under gdb directly shows that it +is just a bog-standard config space access (first for that device). + +The cause of the crash is that nc->peer is not set... no idea how that +can happen, not that familiar with that part of QEMU. (Should the code +check, or is that really something that should not happen?) + +What I don't understand is why it is set correctly for the first, +autogenerated virtio-net-ccw device, but not for the second one, and +why virtio-net-pci doesn't show these problems. The only difference +between -ccw and -pci that comes to my mind here is that config space +accesses for ccw are done via an asynchronous operation, so timing +might be different. +Hopefully Jason has an idea. Could you post a full command line +please? Do you need a working guest to trigger this? Does this trigger +on an x86 host? +Yes, it does trigger with tcg-on-x86 as well. I've been using + +s390x-softmmu/qemu-system-s390x -M s390-ccw-virtio,accel=tcg -cpu qemu,zpci=on +-m 1024 -nographic -device virtio-scsi-ccw,id=scsi0,devno=fe.0.0001 +-drive file=/path/to/image,format=qcow2,if=none,id=drive-scsi0-0-0-0 +-device +scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0-0-0-0,id=scsi0-0-0-0,bootindex=1 +-device virtio-net-ccw + +It seems it needs the guest actually doing something with the nics; I +cannot reproduce the crash if I use the old advent calendar moon buggy +image and just add a virtio-net-ccw device. + +(I don't think it's a problem with my local build, as I see the problem +both on my laptop and on an LPAR.) +It looks to me we forget the check the existence of peer. + +Please try the attached patch to see if it works. + +Thanks +0001-virtio-net-check-the-existence-of-peer-before-accesi.patch +Description: +Text Data + +On Sat, 25 Jul 2020 08:40:07 +0800 +Jason Wang wrote: + +> +On 2020/7/24 下午11:34, Cornelia Huck wrote: +> +> On Fri, 24 Jul 2020 11:17:57 -0400 +> +> "Michael S. Tsirkin" wrote: +> +> +> +>> On Fri, Jul 24, 2020 at 04:56:27PM +0200, Cornelia Huck wrote: +> +>>> On Fri, 24 Jul 2020 09:30:58 -0400 +> +>>> "Michael S. Tsirkin" wrote: +> +>>> +> +>>>> On Fri, Jul 24, 2020 at 03:27:18PM +0200, Cornelia Huck wrote: +> +>>>>> When I start qemu with a second virtio-net-ccw device (i.e. adding +> +>>>>> -device virtio-net-ccw in addition to the autogenerated device), I get +> +>>>>> a segfault. gdb points to +> +>>>>> +> +>>>>> #0 0x000055d6ab52681d in virtio_net_get_config (vdev=, +> +>>>>> config=0x55d6ad9e3f80 "RT") at +> +>>>>> /home/cohuck/git/qemu/hw/net/virtio-net.c:146 +> +>>>>> 146 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { +> +>>>>> +> +>>>>> (backtrace doesn't go further) +> +>>> The core was incomplete, but running under gdb directly shows that it +> +>>> is just a bog-standard config space access (first for that device). +> +>>> +> +>>> The cause of the crash is that nc->peer is not set... no idea how that +> +>>> can happen, not that familiar with that part of QEMU. (Should the code +> +>>> check, or is that really something that should not happen?) +> +>>> +> +>>> What I don't understand is why it is set correctly for the first, +> +>>> autogenerated virtio-net-ccw device, but not for the second one, and +> +>>> why virtio-net-pci doesn't show these problems. The only difference +> +>>> between -ccw and -pci that comes to my mind here is that config space +> +>>> accesses for ccw are done via an asynchronous operation, so timing +> +>>> might be different. +> +>> Hopefully Jason has an idea. Could you post a full command line +> +>> please? Do you need a working guest to trigger this? Does this trigger +> +>> on an x86 host? +> +> Yes, it does trigger with tcg-on-x86 as well. I've been using +> +> +> +> s390x-softmmu/qemu-system-s390x -M s390-ccw-virtio,accel=tcg -cpu +> +> qemu,zpci=on +> +> -m 1024 -nographic -device virtio-scsi-ccw,id=scsi0,devno=fe.0.0001 +> +> -drive file=/path/to/image,format=qcow2,if=none,id=drive-scsi0-0-0-0 +> +> -device +> +> scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0-0-0-0,id=scsi0-0-0-0,bootindex=1 +> +> -device virtio-net-ccw +> +> +> +> It seems it needs the guest actually doing something with the nics; I +> +> cannot reproduce the crash if I use the old advent calendar moon buggy +> +> image and just add a virtio-net-ccw device. +> +> +> +> (I don't think it's a problem with my local build, as I see the problem +> +> both on my laptop and on an LPAR.) +> +> +> +It looks to me we forget the check the existence of peer. +> +> +Please try the attached patch to see if it works. +Thanks, that patch gets my guest up and running again. So, FWIW, + +Tested-by: Cornelia Huck + +Any idea why this did not hit with virtio-net-pci (or the autogenerated +virtio-net-ccw device)? + +On 2020/7/27 下午2:43, Cornelia Huck wrote: +On Sat, 25 Jul 2020 08:40:07 +0800 +Jason Wang wrote: +On 2020/7/24 下午11:34, Cornelia Huck wrote: +On Fri, 24 Jul 2020 11:17:57 -0400 +"Michael S. Tsirkin" wrote: +On Fri, Jul 24, 2020 at 04:56:27PM +0200, Cornelia Huck wrote: +On Fri, 24 Jul 2020 09:30:58 -0400 +"Michael S. Tsirkin" wrote: +On Fri, Jul 24, 2020 at 03:27:18PM +0200, Cornelia Huck wrote: +When I start qemu with a second virtio-net-ccw device (i.e. adding +-device virtio-net-ccw in addition to the autogenerated device), I get +a segfault. gdb points to + +#0 0x000055d6ab52681d in virtio_net_get_config (vdev=, + config=0x55d6ad9e3f80 "RT") at +/home/cohuck/git/qemu/hw/net/virtio-net.c:146 +146 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { + +(backtrace doesn't go further) +The core was incomplete, but running under gdb directly shows that it +is just a bog-standard config space access (first for that device). + +The cause of the crash is that nc->peer is not set... no idea how that +can happen, not that familiar with that part of QEMU. (Should the code +check, or is that really something that should not happen?) + +What I don't understand is why it is set correctly for the first, +autogenerated virtio-net-ccw device, but not for the second one, and +why virtio-net-pci doesn't show these problems. The only difference +between -ccw and -pci that comes to my mind here is that config space +accesses for ccw are done via an asynchronous operation, so timing +might be different. +Hopefully Jason has an idea. Could you post a full command line +please? Do you need a working guest to trigger this? Does this trigger +on an x86 host? +Yes, it does trigger with tcg-on-x86 as well. I've been using + +s390x-softmmu/qemu-system-s390x -M s390-ccw-virtio,accel=tcg -cpu qemu,zpci=on +-m 1024 -nographic -device virtio-scsi-ccw,id=scsi0,devno=fe.0.0001 +-drive file=/path/to/image,format=qcow2,if=none,id=drive-scsi0-0-0-0 +-device +scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0-0-0-0,id=scsi0-0-0-0,bootindex=1 +-device virtio-net-ccw + +It seems it needs the guest actually doing something with the nics; I +cannot reproduce the crash if I use the old advent calendar moon buggy +image and just add a virtio-net-ccw device. + +(I don't think it's a problem with my local build, as I see the problem +both on my laptop and on an LPAR.) +It looks to me we forget the check the existence of peer. + +Please try the attached patch to see if it works. +Thanks, that patch gets my guest up and running again. So, FWIW, + +Tested-by: Cornelia Huck + +Any idea why this did not hit with virtio-net-pci (or the autogenerated +virtio-net-ccw device)? +It can be hit with virtio-net-pci as well (just start without peer). +For autogenerated virtio-net-cww, I think the reason is that it has +already had a peer set. +Thanks + +On Mon, 27 Jul 2020 15:38:12 +0800 +Jason Wang wrote: + +> +On 2020/7/27 下午2:43, Cornelia Huck wrote: +> +> On Sat, 25 Jul 2020 08:40:07 +0800 +> +> Jason Wang wrote: +> +> +> +>> On 2020/7/24 下午11:34, Cornelia Huck wrote: +> +>>> On Fri, 24 Jul 2020 11:17:57 -0400 +> +>>> "Michael S. Tsirkin" wrote: +> +>>> +> +>>>> On Fri, Jul 24, 2020 at 04:56:27PM +0200, Cornelia Huck wrote: +> +>>>>> On Fri, 24 Jul 2020 09:30:58 -0400 +> +>>>>> "Michael S. Tsirkin" wrote: +> +>>>>> +> +>>>>>> On Fri, Jul 24, 2020 at 03:27:18PM +0200, Cornelia Huck wrote: +> +>>>>>>> When I start qemu with a second virtio-net-ccw device (i.e. adding +> +>>>>>>> -device virtio-net-ccw in addition to the autogenerated device), I get +> +>>>>>>> a segfault. gdb points to +> +>>>>>>> +> +>>>>>>> #0 0x000055d6ab52681d in virtio_net_get_config (vdev=, +> +>>>>>>> config=0x55d6ad9e3f80 "RT") at +> +>>>>>>> /home/cohuck/git/qemu/hw/net/virtio-net.c:146 +> +>>>>>>> 146 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { +> +>>>>>>> +> +>>>>>>> (backtrace doesn't go further) +> +>>>>> The core was incomplete, but running under gdb directly shows that it +> +>>>>> is just a bog-standard config space access (first for that device). +> +>>>>> +> +>>>>> The cause of the crash is that nc->peer is not set... no idea how that +> +>>>>> can happen, not that familiar with that part of QEMU. (Should the code +> +>>>>> check, or is that really something that should not happen?) +> +>>>>> +> +>>>>> What I don't understand is why it is set correctly for the first, +> +>>>>> autogenerated virtio-net-ccw device, but not for the second one, and +> +>>>>> why virtio-net-pci doesn't show these problems. The only difference +> +>>>>> between -ccw and -pci that comes to my mind here is that config space +> +>>>>> accesses for ccw are done via an asynchronous operation, so timing +> +>>>>> might be different. +> +>>>> Hopefully Jason has an idea. Could you post a full command line +> +>>>> please? Do you need a working guest to trigger this? Does this trigger +> +>>>> on an x86 host? +> +>>> Yes, it does trigger with tcg-on-x86 as well. I've been using +> +>>> +> +>>> s390x-softmmu/qemu-system-s390x -M s390-ccw-virtio,accel=tcg -cpu +> +>>> qemu,zpci=on +> +>>> -m 1024 -nographic -device virtio-scsi-ccw,id=scsi0,devno=fe.0.0001 +> +>>> -drive file=/path/to/image,format=qcow2,if=none,id=drive-scsi0-0-0-0 +> +>>> -device +> +>>> scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0-0-0-0,id=scsi0-0-0-0,bootindex=1 +> +>>> -device virtio-net-ccw +> +>>> +> +>>> It seems it needs the guest actually doing something with the nics; I +> +>>> cannot reproduce the crash if I use the old advent calendar moon buggy +> +>>> image and just add a virtio-net-ccw device. +> +>>> +> +>>> (I don't think it's a problem with my local build, as I see the problem +> +>>> both on my laptop and on an LPAR.) +> +>> +> +>> It looks to me we forget the check the existence of peer. +> +>> +> +>> Please try the attached patch to see if it works. +> +> Thanks, that patch gets my guest up and running again. So, FWIW, +> +> +> +> Tested-by: Cornelia Huck +> +> +> +> Any idea why this did not hit with virtio-net-pci (or the autogenerated +> +> virtio-net-ccw device)? +> +> +> +It can be hit with virtio-net-pci as well (just start without peer). +Hm, I had not been able to reproduce the crash with a 'naked' -device +virtio-net-pci. But checking seems to be the right idea anyway. + +> +> +For autogenerated virtio-net-cww, I think the reason is that it has +> +already had a peer set. +Ok, that might well be. + +On 2020/7/27 下午4:41, Cornelia Huck wrote: +On Mon, 27 Jul 2020 15:38:12 +0800 +Jason Wang wrote: +On 2020/7/27 下午2:43, Cornelia Huck wrote: +On Sat, 25 Jul 2020 08:40:07 +0800 +Jason Wang wrote: +On 2020/7/24 下午11:34, Cornelia Huck wrote: +On Fri, 24 Jul 2020 11:17:57 -0400 +"Michael S. Tsirkin" wrote: +On Fri, Jul 24, 2020 at 04:56:27PM +0200, Cornelia Huck wrote: +On Fri, 24 Jul 2020 09:30:58 -0400 +"Michael S. Tsirkin" wrote: +On Fri, Jul 24, 2020 at 03:27:18PM +0200, Cornelia Huck wrote: +When I start qemu with a second virtio-net-ccw device (i.e. adding +-device virtio-net-ccw in addition to the autogenerated device), I get +a segfault. gdb points to + +#0 0x000055d6ab52681d in virtio_net_get_config (vdev=, + config=0x55d6ad9e3f80 "RT") at +/home/cohuck/git/qemu/hw/net/virtio-net.c:146 +146 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { + +(backtrace doesn't go further) +The core was incomplete, but running under gdb directly shows that it +is just a bog-standard config space access (first for that device). + +The cause of the crash is that nc->peer is not set... no idea how that +can happen, not that familiar with that part of QEMU. (Should the code +check, or is that really something that should not happen?) + +What I don't understand is why it is set correctly for the first, +autogenerated virtio-net-ccw device, but not for the second one, and +why virtio-net-pci doesn't show these problems. The only difference +between -ccw and -pci that comes to my mind here is that config space +accesses for ccw are done via an asynchronous operation, so timing +might be different. +Hopefully Jason has an idea. Could you post a full command line +please? Do you need a working guest to trigger this? Does this trigger +on an x86 host? +Yes, it does trigger with tcg-on-x86 as well. I've been using + +s390x-softmmu/qemu-system-s390x -M s390-ccw-virtio,accel=tcg -cpu qemu,zpci=on +-m 1024 -nographic -device virtio-scsi-ccw,id=scsi0,devno=fe.0.0001 +-drive file=/path/to/image,format=qcow2,if=none,id=drive-scsi0-0-0-0 +-device +scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0-0-0-0,id=scsi0-0-0-0,bootindex=1 +-device virtio-net-ccw + +It seems it needs the guest actually doing something with the nics; I +cannot reproduce the crash if I use the old advent calendar moon buggy +image and just add a virtio-net-ccw device. + +(I don't think it's a problem with my local build, as I see the problem +both on my laptop and on an LPAR.) +It looks to me we forget the check the existence of peer. + +Please try the attached patch to see if it works. +Thanks, that patch gets my guest up and running again. So, FWIW, + +Tested-by: Cornelia Huck + +Any idea why this did not hit with virtio-net-pci (or the autogenerated +virtio-net-ccw device)? +It can be hit with virtio-net-pci as well (just start without peer). +Hm, I had not been able to reproduce the crash with a 'naked' -device +virtio-net-pci. But checking seems to be the right idea anyway. +Sorry for being unclear, I meant for networking part, you just need +start without peer, and you need a real guest (any Linux) that is trying +to access the config space of virtio-net. +Thanks +For autogenerated virtio-net-cww, I think the reason is that it has +already had a peer set. +Ok, that might well be. + +On Mon, Jul 27, 2020 at 04:51:23PM +0800, Jason Wang wrote: +> +> +On 2020/7/27 下午4:41, Cornelia Huck wrote: +> +> On Mon, 27 Jul 2020 15:38:12 +0800 +> +> Jason Wang wrote: +> +> +> +> > On 2020/7/27 下午2:43, Cornelia Huck wrote: +> +> > > On Sat, 25 Jul 2020 08:40:07 +0800 +> +> > > Jason Wang wrote: +> +> > > > On 2020/7/24 下午11:34, Cornelia Huck wrote: +> +> > > > > On Fri, 24 Jul 2020 11:17:57 -0400 +> +> > > > > "Michael S. Tsirkin" wrote: +> +> > > > > > On Fri, Jul 24, 2020 at 04:56:27PM +0200, Cornelia Huck wrote: +> +> > > > > > > On Fri, 24 Jul 2020 09:30:58 -0400 +> +> > > > > > > "Michael S. Tsirkin" wrote: +> +> > > > > > > > On Fri, Jul 24, 2020 at 03:27:18PM +0200, Cornelia Huck wrote: +> +> > > > > > > > > When I start qemu with a second virtio-net-ccw device (i.e. +> +> > > > > > > > > adding +> +> > > > > > > > > -device virtio-net-ccw in addition to the autogenerated +> +> > > > > > > > > device), I get +> +> > > > > > > > > a segfault. gdb points to +> +> > > > > > > > > +> +> > > > > > > > > #0 0x000055d6ab52681d in virtio_net_get_config +> +> > > > > > > > > (vdev=, +> +> > > > > > > > > config=0x55d6ad9e3f80 "RT") at +> +> > > > > > > > > /home/cohuck/git/qemu/hw/net/virtio-net.c:146 +> +> > > > > > > > > 146 if (nc->peer->info->type == +> +> > > > > > > > > NET_CLIENT_DRIVER_VHOST_VDPA) { +> +> > > > > > > > > +> +> > > > > > > > > (backtrace doesn't go further) +> +> > > > > > > The core was incomplete, but running under gdb directly shows +> +> > > > > > > that it +> +> > > > > > > is just a bog-standard config space access (first for that +> +> > > > > > > device). +> +> > > > > > > +> +> > > > > > > The cause of the crash is that nc->peer is not set... no idea +> +> > > > > > > how that +> +> > > > > > > can happen, not that familiar with that part of QEMU. (Should +> +> > > > > > > the code +> +> > > > > > > check, or is that really something that should not happen?) +> +> > > > > > > +> +> > > > > > > What I don't understand is why it is set correctly for the +> +> > > > > > > first, +> +> > > > > > > autogenerated virtio-net-ccw device, but not for the second +> +> > > > > > > one, and +> +> > > > > > > why virtio-net-pci doesn't show these problems. The only +> +> > > > > > > difference +> +> > > > > > > between -ccw and -pci that comes to my mind here is that config +> +> > > > > > > space +> +> > > > > > > accesses for ccw are done via an asynchronous operation, so +> +> > > > > > > timing +> +> > > > > > > might be different. +> +> > > > > > Hopefully Jason has an idea. Could you post a full command line +> +> > > > > > please? Do you need a working guest to trigger this? Does this +> +> > > > > > trigger +> +> > > > > > on an x86 host? +> +> > > > > Yes, it does trigger with tcg-on-x86 as well. I've been using +> +> > > > > +> +> > > > > s390x-softmmu/qemu-system-s390x -M s390-ccw-virtio,accel=tcg -cpu +> +> > > > > qemu,zpci=on +> +> > > > > -m 1024 -nographic -device virtio-scsi-ccw,id=scsi0,devno=fe.0.0001 +> +> > > > > -drive file=/path/to/image,format=qcow2,if=none,id=drive-scsi0-0-0-0 +> +> > > > > -device +> +> > > > > scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0-0-0-0,id=scsi0-0-0-0,bootindex=1 +> +> > > > > -device virtio-net-ccw +> +> > > > > +> +> > > > > It seems it needs the guest actually doing something with the nics; +> +> > > > > I +> +> > > > > cannot reproduce the crash if I use the old advent calendar moon +> +> > > > > buggy +> +> > > > > image and just add a virtio-net-ccw device. +> +> > > > > +> +> > > > > (I don't think it's a problem with my local build, as I see the +> +> > > > > problem +> +> > > > > both on my laptop and on an LPAR.) +> +> > > > It looks to me we forget the check the existence of peer. +> +> > > > +> +> > > > Please try the attached patch to see if it works. +> +> > > Thanks, that patch gets my guest up and running again. So, FWIW, +> +> > > +> +> > > Tested-by: Cornelia Huck +> +> > > +> +> > > Any idea why this did not hit with virtio-net-pci (or the autogenerated +> +> > > virtio-net-ccw device)? +> +> > +> +> > It can be hit with virtio-net-pci as well (just start without peer). +> +> Hm, I had not been able to reproduce the crash with a 'naked' -device +> +> virtio-net-pci. But checking seems to be the right idea anyway. +> +> +> +Sorry for being unclear, I meant for networking part, you just need start +> +without peer, and you need a real guest (any Linux) that is trying to access +> +the config space of virtio-net. +> +> +Thanks +A pxe guest will do it, but that doesn't support ccw, right? + +I'm still unclear why this triggers with ccw but not pci - +any idea? + +> +> +> +> +> > For autogenerated virtio-net-cww, I think the reason is that it has +> +> > already had a peer set. +> +> Ok, that might well be. +> +> +> +> + +On 2020/7/27 下午7:43, Michael S. Tsirkin wrote: +On Mon, Jul 27, 2020 at 04:51:23PM +0800, Jason Wang wrote: +On 2020/7/27 下午4:41, Cornelia Huck wrote: +On Mon, 27 Jul 2020 15:38:12 +0800 +Jason Wang wrote: +On 2020/7/27 下午2:43, Cornelia Huck wrote: +On Sat, 25 Jul 2020 08:40:07 +0800 +Jason Wang wrote: +On 2020/7/24 下午11:34, Cornelia Huck wrote: +On Fri, 24 Jul 2020 11:17:57 -0400 +"Michael S. Tsirkin" wrote: +On Fri, Jul 24, 2020 at 04:56:27PM +0200, Cornelia Huck wrote: +On Fri, 24 Jul 2020 09:30:58 -0400 +"Michael S. Tsirkin" wrote: +On Fri, Jul 24, 2020 at 03:27:18PM +0200, Cornelia Huck wrote: +When I start qemu with a second virtio-net-ccw device (i.e. adding +-device virtio-net-ccw in addition to the autogenerated device), I get +a segfault. gdb points to + +#0 0x000055d6ab52681d in virtio_net_get_config (vdev=, + config=0x55d6ad9e3f80 "RT") at +/home/cohuck/git/qemu/hw/net/virtio-net.c:146 +146 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { + +(backtrace doesn't go further) +The core was incomplete, but running under gdb directly shows that it +is just a bog-standard config space access (first for that device). + +The cause of the crash is that nc->peer is not set... no idea how that +can happen, not that familiar with that part of QEMU. (Should the code +check, or is that really something that should not happen?) + +What I don't understand is why it is set correctly for the first, +autogenerated virtio-net-ccw device, but not for the second one, and +why virtio-net-pci doesn't show these problems. The only difference +between -ccw and -pci that comes to my mind here is that config space +accesses for ccw are done via an asynchronous operation, so timing +might be different. +Hopefully Jason has an idea. Could you post a full command line +please? Do you need a working guest to trigger this? Does this trigger +on an x86 host? +Yes, it does trigger with tcg-on-x86 as well. I've been using + +s390x-softmmu/qemu-system-s390x -M s390-ccw-virtio,accel=tcg -cpu qemu,zpci=on +-m 1024 -nographic -device virtio-scsi-ccw,id=scsi0,devno=fe.0.0001 +-drive file=/path/to/image,format=qcow2,if=none,id=drive-scsi0-0-0-0 +-device +scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0-0-0-0,id=scsi0-0-0-0,bootindex=1 +-device virtio-net-ccw + +It seems it needs the guest actually doing something with the nics; I +cannot reproduce the crash if I use the old advent calendar moon buggy +image and just add a virtio-net-ccw device. + +(I don't think it's a problem with my local build, as I see the problem +both on my laptop and on an LPAR.) +It looks to me we forget the check the existence of peer. + +Please try the attached patch to see if it works. +Thanks, that patch gets my guest up and running again. So, FWIW, + +Tested-by: Cornelia Huck + +Any idea why this did not hit with virtio-net-pci (or the autogenerated +virtio-net-ccw device)? +It can be hit with virtio-net-pci as well (just start without peer). +Hm, I had not been able to reproduce the crash with a 'naked' -device +virtio-net-pci. But checking seems to be the right idea anyway. +Sorry for being unclear, I meant for networking part, you just need start +without peer, and you need a real guest (any Linux) that is trying to access +the config space of virtio-net. + +Thanks +A pxe guest will do it, but that doesn't support ccw, right? +Yes, it depends on the cli actually. +I'm still unclear why this triggers with ccw but not pci - +any idea? +I don't test pxe but I can reproduce this with pci (just start a linux +guest without a peer). +Thanks + +On Mon, Jul 27, 2020 at 08:44:09PM +0800, Jason Wang wrote: +> +> +On 2020/7/27 下午7:43, Michael S. Tsirkin wrote: +> +> On Mon, Jul 27, 2020 at 04:51:23PM +0800, Jason Wang wrote: +> +> > On 2020/7/27 下午4:41, Cornelia Huck wrote: +> +> > > On Mon, 27 Jul 2020 15:38:12 +0800 +> +> > > Jason Wang wrote: +> +> > > +> +> > > > On 2020/7/27 下午2:43, Cornelia Huck wrote: +> +> > > > > On Sat, 25 Jul 2020 08:40:07 +0800 +> +> > > > > Jason Wang wrote: +> +> > > > > > On 2020/7/24 下午11:34, Cornelia Huck wrote: +> +> > > > > > > On Fri, 24 Jul 2020 11:17:57 -0400 +> +> > > > > > > "Michael S. Tsirkin" wrote: +> +> > > > > > > > On Fri, Jul 24, 2020 at 04:56:27PM +0200, Cornelia Huck wrote: +> +> > > > > > > > > On Fri, 24 Jul 2020 09:30:58 -0400 +> +> > > > > > > > > "Michael S. Tsirkin" wrote: +> +> > > > > > > > > > On Fri, Jul 24, 2020 at 03:27:18PM +0200, Cornelia Huck +> +> > > > > > > > > > wrote: +> +> > > > > > > > > > > When I start qemu with a second virtio-net-ccw device +> +> > > > > > > > > > > (i.e. adding +> +> > > > > > > > > > > -device virtio-net-ccw in addition to the autogenerated +> +> > > > > > > > > > > device), I get +> +> > > > > > > > > > > a segfault. gdb points to +> +> > > > > > > > > > > +> +> > > > > > > > > > > #0 0x000055d6ab52681d in virtio_net_get_config +> +> > > > > > > > > > > (vdev=, +> +> > > > > > > > > > > config=0x55d6ad9e3f80 "RT") at +> +> > > > > > > > > > > /home/cohuck/git/qemu/hw/net/virtio-net.c:146 +> +> > > > > > > > > > > 146 if (nc->peer->info->type == +> +> > > > > > > > > > > NET_CLIENT_DRIVER_VHOST_VDPA) { +> +> > > > > > > > > > > +> +> > > > > > > > > > > (backtrace doesn't go further) +> +> > > > > > > > > The core was incomplete, but running under gdb directly +> +> > > > > > > > > shows that it +> +> > > > > > > > > is just a bog-standard config space access (first for that +> +> > > > > > > > > device). +> +> > > > > > > > > +> +> > > > > > > > > The cause of the crash is that nc->peer is not set... no +> +> > > > > > > > > idea how that +> +> > > > > > > > > can happen, not that familiar with that part of QEMU. +> +> > > > > > > > > (Should the code +> +> > > > > > > > > check, or is that really something that should not happen?) +> +> > > > > > > > > +> +> > > > > > > > > What I don't understand is why it is set correctly for the +> +> > > > > > > > > first, +> +> > > > > > > > > autogenerated virtio-net-ccw device, but not for the second +> +> > > > > > > > > one, and +> +> > > > > > > > > why virtio-net-pci doesn't show these problems. The only +> +> > > > > > > > > difference +> +> > > > > > > > > between -ccw and -pci that comes to my mind here is that +> +> > > > > > > > > config space +> +> > > > > > > > > accesses for ccw are done via an asynchronous operation, so +> +> > > > > > > > > timing +> +> > > > > > > > > might be different. +> +> > > > > > > > Hopefully Jason has an idea. Could you post a full command +> +> > > > > > > > line +> +> > > > > > > > please? Do you need a working guest to trigger this? Does +> +> > > > > > > > this trigger +> +> > > > > > > > on an x86 host? +> +> > > > > > > Yes, it does trigger with tcg-on-x86 as well. I've been using +> +> > > > > > > +> +> > > > > > > s390x-softmmu/qemu-system-s390x -M s390-ccw-virtio,accel=tcg +> +> > > > > > > -cpu qemu,zpci=on +> +> > > > > > > -m 1024 -nographic -device +> +> > > > > > > virtio-scsi-ccw,id=scsi0,devno=fe.0.0001 +> +> > > > > > > -drive +> +> > > > > > > file=/path/to/image,format=qcow2,if=none,id=drive-scsi0-0-0-0 +> +> > > > > > > -device +> +> > > > > > > scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0-0-0-0,id=scsi0-0-0-0,bootindex=1 +> +> > > > > > > -device virtio-net-ccw +> +> > > > > > > +> +> > > > > > > It seems it needs the guest actually doing something with the +> +> > > > > > > nics; I +> +> > > > > > > cannot reproduce the crash if I use the old advent calendar +> +> > > > > > > moon buggy +> +> > > > > > > image and just add a virtio-net-ccw device. +> +> > > > > > > +> +> > > > > > > (I don't think it's a problem with my local build, as I see the +> +> > > > > > > problem +> +> > > > > > > both on my laptop and on an LPAR.) +> +> > > > > > It looks to me we forget the check the existence of peer. +> +> > > > > > +> +> > > > > > Please try the attached patch to see if it works. +> +> > > > > Thanks, that patch gets my guest up and running again. So, FWIW, +> +> > > > > +> +> > > > > Tested-by: Cornelia Huck +> +> > > > > +> +> > > > > Any idea why this did not hit with virtio-net-pci (or the +> +> > > > > autogenerated +> +> > > > > virtio-net-ccw device)? +> +> > > > It can be hit with virtio-net-pci as well (just start without peer). +> +> > > Hm, I had not been able to reproduce the crash with a 'naked' -device +> +> > > virtio-net-pci. But checking seems to be the right idea anyway. +> +> > Sorry for being unclear, I meant for networking part, you just need start +> +> > without peer, and you need a real guest (any Linux) that is trying to +> +> > access +> +> > the config space of virtio-net. +> +> > +> +> > Thanks +> +> A pxe guest will do it, but that doesn't support ccw, right? +> +> +> +Yes, it depends on the cli actually. +> +> +> +> +> +> I'm still unclear why this triggers with ccw but not pci - +> +> any idea? +> +> +> +I don't test pxe but I can reproduce this with pci (just start a linux guest +> +without a peer). +> +> +Thanks +> +Might be a good addition to a unit test. Not sure what would the +test do exactly: just make sure guest runs? Looks like a lot of work +for an empty test ... maybe we can poke at the guest config with +qtest commands at least. + +-- +MST + +On 2020/7/27 下午9:16, Michael S. Tsirkin wrote: +On Mon, Jul 27, 2020 at 08:44:09PM +0800, Jason Wang wrote: +On 2020/7/27 下午7:43, Michael S. Tsirkin wrote: +On Mon, Jul 27, 2020 at 04:51:23PM +0800, Jason Wang wrote: +On 2020/7/27 下午4:41, Cornelia Huck wrote: +On Mon, 27 Jul 2020 15:38:12 +0800 +Jason Wang wrote: +On 2020/7/27 下午2:43, Cornelia Huck wrote: +On Sat, 25 Jul 2020 08:40:07 +0800 +Jason Wang wrote: +On 2020/7/24 下午11:34, Cornelia Huck wrote: +On Fri, 24 Jul 2020 11:17:57 -0400 +"Michael S. Tsirkin" wrote: +On Fri, Jul 24, 2020 at 04:56:27PM +0200, Cornelia Huck wrote: +On Fri, 24 Jul 2020 09:30:58 -0400 +"Michael S. Tsirkin" wrote: +On Fri, Jul 24, 2020 at 03:27:18PM +0200, Cornelia Huck wrote: +When I start qemu with a second virtio-net-ccw device (i.e. adding +-device virtio-net-ccw in addition to the autogenerated device), I get +a segfault. gdb points to + +#0 0x000055d6ab52681d in virtio_net_get_config (vdev=, + config=0x55d6ad9e3f80 "RT") at +/home/cohuck/git/qemu/hw/net/virtio-net.c:146 +146 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { + +(backtrace doesn't go further) +The core was incomplete, but running under gdb directly shows that it +is just a bog-standard config space access (first for that device). + +The cause of the crash is that nc->peer is not set... no idea how that +can happen, not that familiar with that part of QEMU. (Should the code +check, or is that really something that should not happen?) + +What I don't understand is why it is set correctly for the first, +autogenerated virtio-net-ccw device, but not for the second one, and +why virtio-net-pci doesn't show these problems. The only difference +between -ccw and -pci that comes to my mind here is that config space +accesses for ccw are done via an asynchronous operation, so timing +might be different. +Hopefully Jason has an idea. Could you post a full command line +please? Do you need a working guest to trigger this? Does this trigger +on an x86 host? +Yes, it does trigger with tcg-on-x86 as well. I've been using + +s390x-softmmu/qemu-system-s390x -M s390-ccw-virtio,accel=tcg -cpu qemu,zpci=on +-m 1024 -nographic -device virtio-scsi-ccw,id=scsi0,devno=fe.0.0001 +-drive file=/path/to/image,format=qcow2,if=none,id=drive-scsi0-0-0-0 +-device +scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0-0-0-0,id=scsi0-0-0-0,bootindex=1 +-device virtio-net-ccw + +It seems it needs the guest actually doing something with the nics; I +cannot reproduce the crash if I use the old advent calendar moon buggy +image and just add a virtio-net-ccw device. + +(I don't think it's a problem with my local build, as I see the problem +both on my laptop and on an LPAR.) +It looks to me we forget the check the existence of peer. + +Please try the attached patch to see if it works. +Thanks, that patch gets my guest up and running again. So, FWIW, + +Tested-by: Cornelia Huck + +Any idea why this did not hit with virtio-net-pci (or the autogenerated +virtio-net-ccw device)? +It can be hit with virtio-net-pci as well (just start without peer). +Hm, I had not been able to reproduce the crash with a 'naked' -device +virtio-net-pci. But checking seems to be the right idea anyway. +Sorry for being unclear, I meant for networking part, you just need start +without peer, and you need a real guest (any Linux) that is trying to access +the config space of virtio-net. + +Thanks +A pxe guest will do it, but that doesn't support ccw, right? +Yes, it depends on the cli actually. +I'm still unclear why this triggers with ccw but not pci - +any idea? +I don't test pxe but I can reproduce this with pci (just start a linux guest +without a peer). + +Thanks +Might be a good addition to a unit test. Not sure what would the +test do exactly: just make sure guest runs? Looks like a lot of work +for an empty test ... maybe we can poke at the guest config with +qtest commands at least. +That should work or we can simply extend the exist virtio-net qtest to +do that. +Thanks + diff --git a/classification_output/01/mistranslation/80615920 b/classification_output/01/mistranslation/80615920 new file mode 100644 index 000000000..97712c2f5 --- /dev/null +++ b/classification_output/01/mistranslation/80615920 @@ -0,0 +1,348 @@ +mistranslation: 0.800 +other: 0.786 +instruction: 0.751 +semantic: 0.737 + +[BUG] accel/tcg: cpu_exec_longjmp_cleanup: assertion failed: (cpu == current_cpu) + +It seems there is a bug in SIGALRM handling when 486 system emulates x86_64 +code. + +This code: + +#include +#include +#include +#include +#include + +pthread_t thread1, thread2; + +// Signal handler for SIGALRM +void alarm_handler(int sig) { + // Do nothing, just wake up the other thread +} + +// Thread 1 function +void* thread1_func(void* arg) { + // Set up the signal handler for SIGALRM + signal(SIGALRM, alarm_handler); + + // Wait for 5 seconds + sleep(1); + + // Send SIGALRM signal to thread 2 + pthread_kill(thread2, SIGALRM); + + return NULL; +} + +// Thread 2 function +void* thread2_func(void* arg) { + // Wait for the SIGALRM signal + pause(); + + printf("Thread 2 woke up!\n"); + + return NULL; +} + +int main() { + // Create thread 1 + if (pthread_create(&thread1, NULL, thread1_func, NULL) != 0) { + fprintf(stderr, "Failed to create thread 1\n"); + return 1; + } + + // Create thread 2 + if (pthread_create(&thread2, NULL, thread2_func, NULL) != 0) { + fprintf(stderr, "Failed to create thread 2\n"); + return 1; + } + + // Wait for both threads to finish + pthread_join(thread1, NULL); + pthread_join(thread2, NULL); + + return 0; +} + + +Fails with this -strace log (there are also unsupported syscalls 334 and 435, +but it seems it doesn't affect the code much): + +... +736 rt_sigaction(SIGALRM,0x000000001123ec20,0x000000001123ecc0) = 0 +736 clock_nanosleep(CLOCK_REALTIME,0,{tv_sec = 1,tv_nsec = 0},{tv_sec = +1,tv_nsec = 0}) +736 rt_sigprocmask(SIG_BLOCK,0x00000000109fad20,0x0000000010800b38,8) = 0 +736 Unknown syscall 435 +736 +clone(CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID| + ... +736 rt_sigprocmask(SIG_SETMASK,0x0000000010800b38,NULL,8) +736 set_robust_list(0x11a419a0,0) = -1 errno=38 (Function not implemented) +736 rt_sigprocmask(SIG_SETMASK,0x0000000011a41fb0,NULL,8) = 0 + = 0 +736 pause(0,0,2,277186368,0,295966400) +736 +futex(0x000000001123f990,FUTEX_CLOCK_REALTIME|FUTEX_WAIT_BITSET,738,NULL,NULL,0) + = 0 +736 rt_sigprocmask(SIG_BLOCK,0x00000000109fad20,0x000000001123ee88,8) = 0 +736 getpid() = 736 +736 tgkill(736,739,SIGALRM) = 0 + = -1 errno=4 (Interrupted system call) +--- SIGALRM {si_signo=SIGALRM, si_code=SI_TKILL, si_pid=736, si_uid=0} --- +0x48874a != 0x3c69e10 +736 rt_sigprocmask(SIG_SETMASK,0x000000001123ee88,NULL,8) = 0 +** +ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion failed: +(cpu == current_cpu) +Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion +failed: (cpu == current_cpu) +0x48874a != 0x3c69e10 +** +ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion failed: +(cpu == current_cpu) +Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion +failed: (cpu == current_cpu) +# + +The code fails either with or without -singlestep, the command line: + +/usr/bin/qemu-x86_64 -L /opt/x86_64 -strace -singlestep /opt/x86_64/alarm.bin + +Source code of QEMU 8.1.1 was modified with patch "[PATCH] qemu/timer: Don't +use RDTSC on i486" [1], +with added few ioctls (not relevant) and cpu_exec_longjmp_cleanup() now prints +current pointers of +cpu and current_cpu (line "0x48874a != 0x3c69e10"). + +config.log (built as a part of buildroot, basically the minimal possible +configuration for running x86_64 on 486): + +# Configured with: +'/mnt/hd_8tb_p1/p1/home/crossgen/buildroot_486_2/output/build/qemu-8.1.1/configure' + '--prefix=/usr' +'--cross-prefix=/mnt/hd_8tb_p1/p1/home/crossgen/buildroot_486_2/output/host/bin/i486-buildroot-linux-gnu-' + '--audio-drv-list=' +'--python=/mnt/hd_8tb_p1/p1/home/crossgen/buildroot_486_2/output/host/bin/python3' + +'--ninja=/mnt/hd_8tb_p1/p1/home/crossgen/buildroot_486_2/output/host/bin/ninja' +'--disable-alsa' '--disable-bpf' '--disable-brlapi' '--disable-bsd-user' +'--disable-cap-ng' '--disable-capstone' '--disable-containers' +'--disable-coreaudio' '--disable-curl' '--disable-curses' +'--disable-dbus-display' '--disable-docs' '--disable-dsound' '--disable-hvf' +'--disable-jack' '--disable-libiscsi' '--disable-linux-aio' +'--disable-linux-io-uring' '--disable-malloc-trim' '--disable-membarrier' +'--disable-mpath' '--disable-netmap' '--disable-opengl' '--disable-oss' +'--disable-pa' '--disable-rbd' '--disable-sanitizers' '--disable-selinux' +'--disable-sparse' '--disable-strip' '--disable-vde' '--disable-vhost-crypto' +'--disable-vhost-user-blk-server' '--disable-virtfs' '--disable-whpx' +'--disable-xen' '--disable-attr' '--disable-kvm' '--disable-vhost-net' +'--disable-download' '--disable-hexagon-idef-parser' '--disable-system' +'--enable-linux-user' '--target-list=x86_64-linux-user' '--disable-vhost-user' +'--disable-slirp' '--disable-sdl' '--disable-fdt' '--enable-trace-backends=nop' +'--disable-tools' '--disable-guest-agent' '--disable-fuse' +'--disable-fuse-lseek' '--disable-seccomp' '--disable-libssh' +'--disable-libusb' '--disable-vnc' '--disable-nettle' '--disable-numa' +'--disable-pipewire' '--disable-spice' '--disable-usb-redir' +'--disable-install-blobs' + +Emulation of the same x86_64 code with qemu 6.2.0 installed on another x86_64 +native machine works fine. + +[1] +https://lists.nongnu.org/archive/html/qemu-devel/2023-11/msg05387.html +Best regards, +Petr + +On Sat, 25 Nov 2023 at 13:09, Petr Cvek wrote: +> +> +It seems there is a bug in SIGALRM handling when 486 system emulates x86_64 +> +code. +486 host is pretty well out of support currently. Can you reproduce +this on a less ancient host CPU type ? + +> +ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion failed: +> +(cpu == current_cpu) +> +Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: +> +assertion failed: (cpu == current_cpu) +> +0x48874a != 0x3c69e10 +> +** +> +ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion failed: +> +(cpu == current_cpu) +> +Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: +> +assertion failed: (cpu == current_cpu) +What compiler version do you build QEMU with? That +assert is there because we have seen some buggy compilers +in the past which don't correctly preserve the variable +value as the setjmp/longjmp spec requires them to. + +thanks +-- PMM + +Dne 27. 11. 23 v 10:37 Peter Maydell napsal(a): +> +On Sat, 25 Nov 2023 at 13:09, Petr Cvek wrote: +> +> +> +> It seems there is a bug in SIGALRM handling when 486 system emulates x86_64 +> +> code. +> +> +486 host is pretty well out of support currently. Can you reproduce +> +this on a less ancient host CPU type ? +> +It seems it only fails when the code is compiled for i486. QEMU built with the +same compiler with -march=i586 and above runs on the same physical hardware +without a problem. All -march= variants were executed on ryzen 3600. + +> +> ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion +> +> failed: (cpu == current_cpu) +> +> Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: +> +> assertion failed: (cpu == current_cpu) +> +> 0x48874a != 0x3c69e10 +> +> ** +> +> ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion +> +> failed: (cpu == current_cpu) +> +> Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: +> +> assertion failed: (cpu == current_cpu) +> +> +What compiler version do you build QEMU with? That +> +assert is there because we have seen some buggy compilers +> +in the past which don't correctly preserve the variable +> +value as the setjmp/longjmp spec requires them to. +> +i486 and i586+ code variants were compiled with GCC 13.2.0 (more exactly, +slackware64 current multilib distribution). + +i486 binary which runs on the real 486 is also GCC 13.2.0 and installed as a +part of the buildroot crosscompiler (about two week old git snapshot). + +> +thanks +> +-- PMM +best regards, +Petr + +On 11/25/23 07:08, Petr Cvek wrote: +ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion failed: +(cpu == current_cpu) +Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion +failed: (cpu == current_cpu) +# + +The code fails either with or without -singlestep, the command line: + +/usr/bin/qemu-x86_64 -L /opt/x86_64 -strace -singlestep /opt/x86_64/alarm.bin + +Source code of QEMU 8.1.1 was modified with patch "[PATCH] qemu/timer: Don't use +RDTSC on i486" [1], +with added few ioctls (not relevant) and cpu_exec_longjmp_cleanup() now prints +current pointers of +cpu and current_cpu (line "0x48874a != 0x3c69e10"). +If you try this again with 8.2-rc2, you should not see an assertion failure. +You should see instead + +QEMU internal SIGILL {code=ILLOPC, addr=0x12345678} +which I think more accurately summarizes the situation of attempting RDTSC on hardware +that does not support it. +r~ + +Dne 29. 11. 23 v 15:25 Richard Henderson napsal(a): +> +On 11/25/23 07:08, Petr Cvek wrote: +> +> ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: assertion +> +> failed: (cpu == current_cpu) +> +> Bail out! ERROR:../accel/tcg/cpu-exec.c:546:cpu_exec_longjmp_cleanup: +> +> assertion failed: (cpu == current_cpu) +> +> # +> +> +> +> The code fails either with or without -singlestep, the command line: +> +> +> +> /usr/bin/qemu-x86_64 -L /opt/x86_64 -strace -singlestep +> +> /opt/x86_64/alarm.bin +> +> +> +> Source code of QEMU 8.1.1 was modified with patch "[PATCH] qemu/timer: Don't +> +> use RDTSC on i486" [1], +> +> with added few ioctls (not relevant) and cpu_exec_longjmp_cleanup() now +> +> prints current pointers of +> +> cpu and current_cpu (line "0x48874a != 0x3c69e10"). +> +> +> +If you try this again with 8.2-rc2, you should not see an assertion failure. +> +You should see instead +> +> +QEMU internal SIGILL {code=ILLOPC, addr=0x12345678} +> +> +which I think more accurately summarizes the situation of attempting RDTSC on +> +hardware that does not support it. +> +> +Compilation of vanilla qemu v8.2.0-rc2 with -march=i486 by GCC 13.2.0 and +running the resulting binary on ryzen still leads to: + +** +ERROR:../accel/tcg/cpu-exec.c:533:cpu_exec_longjmp_cleanup: assertion failed: +(cpu == current_cpu) +Bail out! ERROR:../accel/tcg/cpu-exec.c:533:cpu_exec_longjmp_cleanup: assertion +failed: (cpu == current_cpu) +Aborted + +> +> +r~ +Petr + diff --git a/classification_output/01/mistranslation/8720260 b/classification_output/01/mistranslation/8720260 deleted file mode 100644 index 32d247ac7..000000000 --- a/classification_output/01/mistranslation/8720260 +++ /dev/null @@ -1,344 +0,0 @@ -mistranslation: 0.752 -instruction: 0.700 -other: 0.683 -semantic: 0.669 - -[Bug Report][RFC PATCH 0/1] block: fix failing assert on paused VM migration - -There's a bug (failing assert) which is reproduced during migration of -a paused VM. I am able to reproduce it on a stand with 2 nodes and a common -NFS share, with VM's disk on that share. - -root@fedora40-1-vm:~# virsh domblklist alma8-vm - Target Source ------------------------------------------- - sda /mnt/shared/images/alma8.qcow2 - -root@fedora40-1-vm:~# df -Th /mnt/shared -Filesystem Type Size Used Avail Use% Mounted on -127.0.0.1:/srv/nfsd nfs4 63G 16G 48G 25% /mnt/shared - -On the 1st node: - -root@fedora40-1-vm:~# virsh start alma8-vm ; virsh suspend alma8-vm -root@fedora40-1-vm:~# virsh migrate --compressed --p2p --persistent ---undefinesource --live alma8-vm qemu+ssh://fedora40-2-vm/system - -Then on the 2nd node: - -root@fedora40-2-vm:~# virsh migrate --compressed --p2p --persistent ---undefinesource --live alma8-vm qemu+ssh://fedora40-1-vm/system -error: operation failed: domain is not running - -root@fedora40-2-vm:~# tail -3 /var/log/libvirt/qemu/alma8-vm.log -2024-09-19 13:53:33.336+0000: initiating migration -qemu-system-x86_64: ../block.c:6976: int -bdrv_inactivate_recurse(BlockDriverState *): Assertion `!(bs->open_flags & -BDRV_O_INACTIVE)' failed. -2024-09-19 13:53:42.991+0000: shutting down, reason=crashed - -Backtrace: - -(gdb) bt -#0 0x00007f7eaa2f1664 in __pthread_kill_implementation () at /lib64/libc.so.6 -#1 0x00007f7eaa298c4e in raise () at /lib64/libc.so.6 -#2 0x00007f7eaa280902 in abort () at /lib64/libc.so.6 -#3 0x00007f7eaa28081e in __assert_fail_base.cold () at /lib64/libc.so.6 -#4 0x00007f7eaa290d87 in __assert_fail () at /lib64/libc.so.6 -#5 0x0000563c38b95eb8 in bdrv_inactivate_recurse (bs=0x563c3b6c60c0) at -../block.c:6976 -#6 0x0000563c38b95aeb in bdrv_inactivate_all () at ../block.c:7038 -#7 0x0000563c3884d354 in qemu_savevm_state_complete_precopy_non_iterable -(f=0x563c3b700c20, in_postcopy=false, inactivate_disks=true) - at ../migration/savevm.c:1571 -#8 0x0000563c3884dc1a in qemu_savevm_state_complete_precopy (f=0x563c3b700c20, -iterable_only=false, inactivate_disks=true) at ../migration/savevm.c:1631 -#9 0x0000563c3883a340 in migration_completion_precopy (s=0x563c3b4d51f0, -current_active_state=) at ../migration/migration.c:2780 -#10 migration_completion (s=0x563c3b4d51f0) at ../migration/migration.c:2844 -#11 migration_iteration_run (s=0x563c3b4d51f0) at ../migration/migration.c:3270 -#12 migration_thread (opaque=0x563c3b4d51f0) at ../migration/migration.c:3536 -#13 0x0000563c38dbcf14 in qemu_thread_start (args=0x563c3c2d5bf0) at -../util/qemu-thread-posix.c:541 -#14 0x00007f7eaa2ef6d7 in start_thread () at /lib64/libc.so.6 -#15 0x00007f7eaa373414 in clone () at /lib64/libc.so.6 - -What happens here is that after 1st migration BDS related to HDD remains -inactive as VM is still paused. Then when we initiate 2nd migration, -bdrv_inactivate_all() leads to the attempt to set BDRV_O_INACTIVE flag -on that node which is already set, thus assert fails. - -Attached patch which simply skips setting flag if it's already set is more -of a kludge than a clean solution. Should we use more sophisticated logic -which allows some of the nodes be in inactive state prior to the migration, -and takes them into account during bdrv_inactivate_all()? Comments would -be appreciated. - -Andrey - -Andrey Drobyshev (1): - block: do not fail when inactivating node which is inactive - - block.c | 10 +++++++++- - 1 file changed, 9 insertions(+), 1 deletion(-) - --- -2.39.3 - -Instead of throwing an assert let's just ignore that flag is already set -and return. We assume that it's going to be safe to ignore. Otherwise -this assert fails when migrating a paused VM back and forth. - -Ideally we'd like to have a more sophisticated solution, e.g. not even -scan the nodes which should be inactive at this point. - -Signed-off-by: Andrey Drobyshev ---- - block.c | 10 +++++++++- - 1 file changed, 9 insertions(+), 1 deletion(-) - -diff --git a/block.c b/block.c -index 7d90007cae..c1dcf906d1 100644 ---- a/block.c -+++ b/block.c -@@ -6973,7 +6973,15 @@ static int GRAPH_RDLOCK -bdrv_inactivate_recurse(BlockDriverState *bs) - return 0; - } - -- assert(!(bs->open_flags & BDRV_O_INACTIVE)); -+ if (bs->open_flags & BDRV_O_INACTIVE) { -+ /* -+ * Return here instead of throwing assert as a workaround to -+ * prevent failure on migrating paused VM. -+ * Here we assume that if we're trying to inactivate BDS that's -+ * already inactive, it's safe to just ignore it. -+ */ -+ return 0; -+ } - - /* Inactivate this node */ - if (bs->drv->bdrv_inactivate) { --- -2.39.3 - -[add migration maintainers] - -On 24.09.24 15:56, Andrey Drobyshev wrote: -Instead of throwing an assert let's just ignore that flag is already set -and return. We assume that it's going to be safe to ignore. Otherwise -this assert fails when migrating a paused VM back and forth. - -Ideally we'd like to have a more sophisticated solution, e.g. not even -scan the nodes which should be inactive at this point. - -Signed-off-by: Andrey Drobyshev ---- - block.c | 10 +++++++++- - 1 file changed, 9 insertions(+), 1 deletion(-) - -diff --git a/block.c b/block.c -index 7d90007cae..c1dcf906d1 100644 ---- a/block.c -+++ b/block.c -@@ -6973,7 +6973,15 @@ static int GRAPH_RDLOCK -bdrv_inactivate_recurse(BlockDriverState *bs) - return 0; - } -- assert(!(bs->open_flags & BDRV_O_INACTIVE)); -+ if (bs->open_flags & BDRV_O_INACTIVE) { -+ /* -+ * Return here instead of throwing assert as a workaround to -+ * prevent failure on migrating paused VM. -+ * Here we assume that if we're trying to inactivate BDS that's -+ * already inactive, it's safe to just ignore it. -+ */ -+ return 0; -+ } -/* Inactivate this node */ -if (bs->drv->bdrv_inactivate) { -I doubt that this a correct way to go. - -As far as I understand, "inactive" actually means that "storage is not belong to -qemu, but to someone else (another qemu process for example), and may be changed -transparently". In turn this means that Qemu should do nothing with inactive disks. So the -problem is that nobody called bdrv_activate_all on target, and we shouldn't ignore that. - -Hmm, I see in process_incoming_migration_bh() we do call bdrv_activate_all(), -but only in some scenarios. May be, the condition should be less strict here. - -Why we need any condition here at all? Don't we want to activate block-layer on -target after migration anyway? - --- -Best regards, -Vladimir - -On 9/30/24 12:25 PM, Vladimir Sementsov-Ogievskiy wrote: -> -[add migration maintainers] -> -> -On 24.09.24 15:56, Andrey Drobyshev wrote: -> -> [...] -> -> -I doubt that this a correct way to go. -> -> -As far as I understand, "inactive" actually means that "storage is not -> -belong to qemu, but to someone else (another qemu process for example), -> -and may be changed transparently". In turn this means that Qemu should -> -do nothing with inactive disks. So the problem is that nobody called -> -bdrv_activate_all on target, and we shouldn't ignore that. -> -> -Hmm, I see in process_incoming_migration_bh() we do call -> -bdrv_activate_all(), but only in some scenarios. May be, the condition -> -should be less strict here. -> -> -Why we need any condition here at all? Don't we want to activate -> -block-layer on target after migration anyway? -> -Hmm I'm not sure about the unconditional activation, since we at least -have to honor LATE_BLOCK_ACTIVATE cap if it's set (and probably delay it -in such a case). In current libvirt upstream I see such code: - -> -/* Migration capabilities which should always be enabled as long as they -> -> -* are supported by QEMU. If the capability is supposed to be enabled on both -> -> -* sides of migration, it won't be enabled unless both sides support it. -> -> -*/ -> -> -static const qemuMigrationParamsAlwaysOnItem qemuMigrationParamsAlwaysOn[] = -> -{ -> -> -{QEMU_MIGRATION_CAP_PAUSE_BEFORE_SWITCHOVER, -> -> -QEMU_MIGRATION_SOURCE}, -> -> -> -> -{QEMU_MIGRATION_CAP_LATE_BLOCK_ACTIVATE, -> -> -QEMU_MIGRATION_DESTINATION}, -> -> -}; -which means that libvirt always wants LATE_BLOCK_ACTIVATE to be set. - -The code from process_incoming_migration_bh() you're referring to: - -> -/* If capability late_block_activate is set: -> -> -* Only fire up the block code now if we're going to restart the -> -> -* VM, else 'cont' will do it. -> -> -* This causes file locking to happen; so we don't want it to happen -> -> -* unless we really are starting the VM. -> -> -*/ -> -> -if (!migrate_late_block_activate() || -> -> -(autostart && (!global_state_received() || -> -> -runstate_is_live(global_state_get_runstate())))) { -> -> -/* Make sure all file formats throw away their mutable metadata. -> -> -> -* If we get an error here, just don't restart the VM yet. */ -> -> -bdrv_activate_all(&local_err); -> -> -if (local_err) { -> -> -error_report_err(local_err); -> -> -local_err = NULL; -> -> -autostart = false; -> -> -} -> -> -} -It states explicitly that we're either going to start VM right at this -point if (autostart == true), or we wait till "cont" command happens. -None of this is going to happen if we start another migration while -still being in PAUSED state. So I think it seems reasonable to take -such case into account. For instance, this patch does prevent the crash: - -> -diff --git a/migration/migration.c b/migration/migration.c -> -index ae2be31557..3222f6745b 100644 -> ---- a/migration/migration.c -> -+++ b/migration/migration.c -> -@@ -733,7 +733,8 @@ static void process_incoming_migration_bh(void *opaque) -> -*/ -> -if (!migrate_late_block_activate() || -> -(autostart && (!global_state_received() || -> -- runstate_is_live(global_state_get_runstate())))) { -> -+ runstate_is_live(global_state_get_runstate()))) || -> -+ (!autostart && global_state_get_runstate() == RUN_STATE_PAUSED)) { -> -/* Make sure all file formats throw away their mutable metadata. -> -* If we get an error here, just don't restart the VM yet. */ -> -bdrv_activate_all(&local_err); -What are your thoughts on it? - -Andrey - diff --git a/classification_output/01/mistranslation/8874178 b/classification_output/01/mistranslation/8874178 deleted file mode 100644 index 1ebfe2889..000000000 --- a/classification_output/01/mistranslation/8874178 +++ /dev/null @@ -1,202 +0,0 @@ -mistranslation: 0.928 -other: 0.912 -instruction: 0.835 -semantic: 0.829 - -[Qemu-devel] [Bug?] Guest pause because VMPTRLD failed in KVM - -Hello, - - We encountered a problem that a guest paused because the KMOD report VMPTRLD -failed. - -The related information is as follows: - -1) Qemu command: - /usr/bin/qemu-kvm -name omu1 -S -machine pc-i440fx-2.3,accel=kvm,usb=off -cpu -host -m 15625 -realtime mlock=off -smp 8,sockets=1,cores=8,threads=1 -uuid -a2aacfff-6583-48b4-b6a4-e6830e519931 -no-user-config -nodefaults -chardev -socket,id=charmonitor,path=/var/lib/libvirt/qemu/omu1.monitor,server,nowait --mon chardev=charmonitor,id=monitor,mode=control -rtc base=utc -no-shutdown --boot strict=on -device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 -device -virtio-serial-pci,id=virtio-serial0,bus=pci.0,addr=0x5 -drive -file=/home/env/guest1.qcow2,if=none,id=drive-virtio-disk0,format=qcow2,cache=none,aio=native - -device -virtio-blk-pci,scsi=off,bus=pci.0,addr=0x6,drive=drive-virtio-disk0,id=virtio-disk0 - -drive -file=/home/env/guest_300G.img,if=none,id=drive-virtio-disk1,format=raw,cache=none,aio=native - -device -virtio-blk-pci,scsi=off,bus=pci.0,addr=0x7,drive=drive-virtio-disk1,id=virtio-disk1 - -netdev tap,fd=25,id=hostnet0,vhost=on,vhostfd=26 -device -virtio-net-pci,netdev=hostnet0,id=net0,mac=00:00:80:05:00:00,bus=pci.0,addr=0x3 --netdev tap,fd=27,id=hostnet1,vhost=on,vhostfd=28 -device -virtio-net-pci,netdev=hostnet1,id=net1,mac=00:00:80:05:00:01,bus=pci.0,addr=0x4 --chardev pty,id=charserial0 -device isa-serial,chardev=charserial0,id=serial0 --device usb-tablet,id=input0 -vnc 0.0.0.0:0 -device -cirrus-vga,id=video0,vgamem_mb=16,bus=pci.0,addr=0x2 -device -virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x8 -msg timestamp=on - - 2) Qemu log: - KVM: entry failed, hardware error 0x4 - RAX=00000000ffffffed RBX=ffff8803fa2d7fd8 RCX=0100000000000000 -RDX=0000000000000000 - RSI=0000000000000000 RDI=0000000000000046 RBP=ffff8803fa2d7e90 -RSP=ffff8803fa2efe90 - R8 =0000000000000000 R9 =0000000000000000 R10=0000000000000000 -R11=000000000000b69a - R12=0000000000000001 R13=ffffffff81a25b40 R14=0000000000000000 -R15=ffff8803fa2d7fd8 - RIP=ffffffff81053e16 RFL=00000286 [--S--P-] CPL=0 II=0 A20=1 SMM=0 HLT=0 - ES =0000 0000000000000000 ffffffff 00c00000 - CS =0010 0000000000000000 ffffffff 00a09b00 DPL=0 CS64 [-RA] - SS =0018 0000000000000000 ffffffff 00c09300 DPL=0 DS [-WA] - DS =0000 0000000000000000 ffffffff 00c00000 - FS =0000 0000000000000000 ffffffff 00c00000 - GS =0000 ffff88040f540000 ffffffff 00c00000 - LDT=0000 0000000000000000 ffffffff 00c00000 - TR =0040 ffff88040f550a40 00002087 00008b00 DPL=0 TSS64-busy - GDT= ffff88040f549000 0000007f - IDT= ffffffffff529000 00000fff - CR0=80050033 CR2=00007f81ca0c5000 CR3=00000003f5081000 CR4=000407e0 - DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000 -DR3=0000000000000000 - DR6=00000000ffff0ff0 DR7=0000000000000400 - EFER=0000000000000d01 - Code=?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? -?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? - - 3) Demsg - [347315.028339] kvm: vmptrld ffff8817ec5f0000/17ec5f0000 failed - klogd 1.4.1, ---------- state change ---------- - [347315.039506] kvm: vmptrld ffff8817ec5f0000/17ec5f0000 failed - [347315.051728] kvm: vmptrld ffff8817ec5f0000/17ec5f0000 failed - [347315.057472] vmwrite error: reg 6c0a value ffff88307e66e480 (err -2120672384) - [347315.064567] Pid: 69523, comm: qemu-kvm Tainted: GF X -3.0.93-0.8-default #1 - [347315.064569] Call Trace: - [347315.064587] [] dump_trace+0x75/0x300 - [347315.064595] [] dump_stack+0x69/0x6f - [347315.064617] [] vmx_vcpu_load+0x11e/0x1d0 [kvm_intel] - [347315.064647] [] kvm_arch_vcpu_load+0x44/0x1d0 [kvm] - [347315.064669] [] finish_task_switch+0x81/0xe0 - [347315.064676] [] thread_return+0x3b/0x2a7 - [347315.064687] [] kvm_vcpu_block+0x65/0xa0 [kvm] - [347315.064703] [] __vcpu_run+0xd1/0x260 [kvm] - [347315.064732] [] kvm_arch_vcpu_ioctl_run+0x68/0x1a0 -[kvm] - [347315.064759] [] kvm_vcpu_ioctl+0x38e/0x580 [kvm] - [347315.064771] [] do_vfs_ioctl+0x8b/0x3b0 - [347315.064776] [] sys_ioctl+0xa1/0xb0 - [347315.064783] [] system_call_fastpath+0x16/0x1b - [347315.064797] [<00007fee51969ce7>] 0x7fee51969ce6 - [347315.064799] vmwrite error: reg 6c0c value ffff88307e664000 (err -2120630272) - [347315.064802] Pid: 69523, comm: qemu-kvm Tainted: GF X -3.0.93-0.8-default #1 - [347315.064803] Call Trace: - [347315.064807] [] dump_trace+0x75/0x300 - [347315.064811] [] dump_stack+0x69/0x6f - [347315.064817] [] vmx_vcpu_load+0x12c/0x1d0 [kvm_intel] - [347315.064832] [] kvm_arch_vcpu_load+0x44/0x1d0 [kvm] - [347315.064851] [] finish_task_switch+0x81/0xe0 - [347315.064855] [] thread_return+0x3b/0x2a7 - [347315.064865] [] kvm_vcpu_block+0x65/0xa0 [kvm] - [347315.064880] [] __vcpu_run+0xd1/0x260 [kvm] - [347315.064907] [] kvm_arch_vcpu_ioctl_run+0x68/0x1a0 -[kvm] - [347315.064933] [] kvm_vcpu_ioctl+0x38e/0x580 [kvm] - [347315.064943] [] do_vfs_ioctl+0x8b/0x3b0 - [347315.064947] [] sys_ioctl+0xa1/0xb0 - [347315.064951] [] system_call_fastpath+0x16/0x1b - [347315.064957] [<00007fee51969ce7>] 0x7fee51969ce6 - [347315.064959] vmwrite error: reg 6c10 value 0 (err 0) - - 4) The isssue can't be reporduced. I search the Intel VMX sepc about reaseons -of vmptrld failure: - The instruction fails if its operand is not properly aligned, sets -unsupported physical-address bits, or is equal to the VMXON - pointer. In addition, the instruction fails if the 32 bits in memory -referenced by the operand do not match the VMCS - revision identifier supported by this processor. - - But I can't find any cues from the KVM source code. It seems each - error conditions is impossible in theory. :( - -Any suggestions will be appreciated! Paolo? - --- -Regards, --Gonglei - -On 10/11/2016 15:10, gong lei wrote: -> -4) The isssue can't be reporduced. I search the Intel VMX sepc about -> -reaseons -> -of vmptrld failure: -> -The instruction fails if its operand is not properly aligned, sets -> -unsupported physical-address bits, or is equal to the VMXON -> -pointer. In addition, the instruction fails if the 32 bits in memory -> -referenced by the operand do not match the VMCS -> -revision identifier supported by this processor. -> -> -But I can't find any cues from the KVM source code. It seems each -> -error conditions is impossible in theory. :( -Yes, it should not happen. :( - -If it's not reproducible, it's really hard to say what it was, except a -random memory corruption elsewhere or even a bit flip (!). - -Paolo - -On 2016/11/17 20:39, Paolo Bonzini wrote: -> -> -On 10/11/2016 15:10, gong lei wrote: -> -> 4) The isssue can't be reporduced. I search the Intel VMX sepc about -> -> reaseons -> -> of vmptrld failure: -> -> The instruction fails if its operand is not properly aligned, sets -> -> unsupported physical-address bits, or is equal to the VMXON -> -> pointer. In addition, the instruction fails if the 32 bits in memory -> -> referenced by the operand do not match the VMCS -> -> revision identifier supported by this processor. -> -> -> -> But I can't find any cues from the KVM source code. It seems each -> -> error conditions is impossible in theory. :( -> -Yes, it should not happen. :( -> -> -If it's not reproducible, it's really hard to say what it was, except a -> -random memory corruption elsewhere or even a bit flip (!). -> -> -Paolo -Thanks for your reply, Paolo :) - --- -Regards, --Gonglei - diff --git a/classification_output/01/other/0001467 b/classification_output/01/other/0001467 deleted file mode 100644 index ebd922167..000000000 --- a/classification_output/01/other/0001467 +++ /dev/null @@ -1,100 +0,0 @@ -other: 0.954 -mistranslation: 0.947 -semantic: 0.933 -instruction: 0.922 - -[BUG] Qemu abort with error "kvm_mem_ioeventfd_add: error adding ioeventfd: File exists (17)" - -Hi list, - -When I did some tests in my virtual domain with live-attached virtio deivces, I -got a coredump file of Qemu. - -The error print from qemu is "kvm_mem_ioeventfd_add: error adding ioeventfd: -File exists (17)". -And the call trace in the coredump file displays as below: -#0 0x0000ffff89acecc8 in ?? () from /usr/lib64/libc.so.6 -#1 0x0000ffff89a8acbc in raise () from /usr/lib64/libc.so.6 -#2 0x0000ffff89a78d2c in abort () from /usr/lib64/libc.so.6 -#3 0x0000aaaabd7ccf1c in kvm_mem_ioeventfd_add (listener=, -section=, match_data=, data=, -e=) at ../accel/kvm/kvm-all.c:1607 -#4 0x0000aaaabd6e0304 in address_space_add_del_ioeventfds (fds_old_nb=164, -fds_old=0xffff5c80a1d0, fds_new_nb=160, fds_new=0xffff5c565080, -as=0xaaaabdfa8810 ) - at ../softmmu/memory.c:795 -#5 address_space_update_ioeventfds (as=0xaaaabdfa8810 ) -at ../softmmu/memory.c:856 -#6 0x0000aaaabd6e24d8 in memory_region_commit () at ../softmmu/memory.c:1113 -#7 0x0000aaaabd6e25c4 in memory_region_transaction_commit () at -../softmmu/memory.c:1144 -#8 0x0000aaaabd394eb4 in pci_bridge_update_mappings -(br=br@entry=0xaaaae755f7c0) at ../hw/pci/pci_bridge.c:248 -#9 0x0000aaaabd394f4c in pci_bridge_write_config (d=0xaaaae755f7c0, -address=44, val=, len=4) at ../hw/pci/pci_bridge.c:272 -#10 0x0000aaaabd39a928 in rp_write_config (d=0xaaaae755f7c0, address=44, -val=128, len=4) at ../hw/pci-bridge/pcie_root_port.c:39 -#11 0x0000aaaabd6df328 in memory_region_write_accessor (mr=0xaaaae63898d0, -addr=65580, value=, size=4, shift=, -mask=, attrs=...) at ../softmmu/memory.c:494 -#12 0x0000aaaabd6dcb6c in access_with_adjusted_size (addr=addr@entry=65580, -value=value@entry=0xffff817adc78, size=size@entry=4, access_size_min=, access_size_max=, - access_fn=access_fn@entry=0xaaaabd6df284 , -mr=mr@entry=0xaaaae63898d0, attrs=attrs@entry=...) at ../softmmu/memory.c:556 -#13 0x0000aaaabd6e0dc8 in memory_region_dispatch_write -(mr=mr@entry=0xaaaae63898d0, addr=65580, data=, op=MO_32, -attrs=attrs@entry=...) at ../softmmu/memory.c:1534 -#14 0x0000aaaabd6d0574 in flatview_write_continue (fv=fv@entry=0xffff5c02da00, -addr=addr@entry=275146407980, attrs=attrs@entry=..., -ptr=ptr@entry=0xffff8aa8c028, len=len@entry=4, - addr1=, l=, mr=mr@entry=0xaaaae63898d0) at -/usr/src/debug/qemu-6.2.0-226.aarch64/include/qemu/host-utils.h:165 -#15 0x0000aaaabd6d4584 in flatview_write (len=4, buf=0xffff8aa8c028, attrs=..., -addr=275146407980, fv=0xffff5c02da00) at ../softmmu/physmem.c:3375 -#16 address_space_write (as=, addr=275146407980, attrs=..., -buf=buf@entry=0xffff8aa8c028, len=4) at ../softmmu/physmem.c:3467 -#17 0x0000aaaabd6d462c in address_space_rw (as=, addr=, attrs=..., attrs@entry=..., buf=buf@entry=0xffff8aa8c028, len=, is_write=) - at ../softmmu/physmem.c:3477 -#18 0x0000aaaabd7cf6e8 in kvm_cpu_exec (cpu=cpu@entry=0xaaaae625dfd0) at -../accel/kvm/kvm-all.c:2970 -#19 0x0000aaaabd7d09bc in kvm_vcpu_thread_fn (arg=arg@entry=0xaaaae625dfd0) at -../accel/kvm/kvm-accel-ops.c:49 -#20 0x0000aaaabd94ccd8 in qemu_thread_start (args=) at -../util/qemu-thread-posix.c:559 - - -By printing more info in the coredump file, I found that the addr of -fds_old[146] and fds_new[146] are same, but fds_old[146] belonged to a -live-attached virtio-scsi device while fds_new[146] was owned by another -live-attached virtio-net. -The reason why addr conflicted was then been found from vm's console log. Just -before qemu aborted, the guest kernel crashed and kdump.service booted the -dump-capture kernel where re-alloced address for the devices. -Because those virtio devices were live-attached after vm creating, different -addr may been assigned to them in the dump-capture kernel: - -the initial kernel booting log: -[ 1.663297] pci 0000:00:02.1: BAR 14: assigned [mem 0x11900000-0x11afffff] -[ 1.664560] pci 0000:00:02.1: BAR 15: assigned [mem -0x8001800000-0x80019fffff 64bit pref] - -the dump-capture kernel booting log: -[ 1.845211] pci 0000:00:02.0: BAR 14: assigned [mem 0x11900000-0x11bfffff] -[ 1.846542] pci 0000:00:02.0: BAR 15: assigned [mem -0x8001800000-0x8001afffff 64bit pref] - - -I think directly aborting the qemu process may not be the best choice in this -case cuz it will interrupt the work of kdump.service so that failed to generate -memory dump of the crashed guest kernel. -Perhaps, IMO, the error could be simply ignored in this case and just let kdump -to reboot the system after memory-dump finishing, but I failed to find a -suitable judgment in the codes. - -Any solution for this problem? Hope I can get some helps here. - -Hao - diff --git a/classification_output/01/other/02364653 b/classification_output/01/other/02364653 new file mode 100644 index 000000000..0142a9653 --- /dev/null +++ b/classification_output/01/other/02364653 @@ -0,0 +1,363 @@ +other: 0.956 +semantic: 0.942 +instruction: 0.927 +mistranslation: 0.912 + +[Qemu-devel] [BUG] Inappropriate size of target_sigset_t + +Hello, Peter, Laurent, + +While working on another problem yesterday, I think I discovered a +long-standing bug in QEMU Linux user mode: our target_sigset_t structure is +eight times smaller as it should be! + +In this code segment from syscalls_def.h: + +#ifdef TARGET_MIPS +#define TARGET_NSIG 128 +#else +#define TARGET_NSIG 64 +#endif +#define TARGET_NSIG_BPW TARGET_ABI_BITS +#define TARGET_NSIG_WORDS (TARGET_NSIG / TARGET_NSIG_BPW) + +typedef struct { + abi_ulong sig[TARGET_NSIG_WORDS]; +} target_sigset_t; + +... TARGET_ABI_BITS should be replaced by eight times smaller constant (in +fact, semantically, we need TARGET_ABI_BYTES, but it is not defined) (what is +needed is actually "a byte per signal" in target_sigset_t, and we allow "a bit +per signal"). + +All this probably sounds to you like something impossible, since this code is +in QEMU "since forever", but I checked everything, and the bug seems real. I +wish you can prove me wrong. + +I just wanted to let you know about this, given the sensitive timing of current +softfreeze, and the fact that I won't be able to do more investigation on this +in coming weeks, since I am busy with other tasks, but perhaps you can analyze +and do something which you consider appropriate. + +Yours, +Aleksandar + +Le 03/07/2019 à 21:46, Aleksandar Markovic a écrit : +> +Hello, Peter, Laurent, +> +> +While working on another problem yesterday, I think I discovered a +> +long-standing bug in QEMU Linux user mode: our target_sigset_t structure is +> +eight times smaller as it should be! +> +> +In this code segment from syscalls_def.h: +> +> +#ifdef TARGET_MIPS +> +#define TARGET_NSIG 128 +> +#else +> +#define TARGET_NSIG 64 +> +#endif +> +#define TARGET_NSIG_BPW TARGET_ABI_BITS +> +#define TARGET_NSIG_WORDS (TARGET_NSIG / TARGET_NSIG_BPW) +> +> +typedef struct { +> +abi_ulong sig[TARGET_NSIG_WORDS]; +> +} target_sigset_t; +> +> +... TARGET_ABI_BITS should be replaced by eight times smaller constant (in +> +fact, semantically, we need TARGET_ABI_BYTES, but it is not defined) (what is +> +needed is actually "a byte per signal" in target_sigset_t, and we allow "a +> +bit per signal"). +TARGET_NSIG is divided by TARGET_ABI_BITS which gives you the number of +abi_ulong words we need in target_sigset_t. + +> +All this probably sounds to you like something impossible, since this code is +> +in QEMU "since forever", but I checked everything, and the bug seems real. I +> +wish you can prove me wrong. +> +> +I just wanted to let you know about this, given the sensitive timing of +> +current softfreeze, and the fact that I won't be able to do more +> +investigation on this in coming weeks, since I am busy with other tasks, but +> +perhaps you can analyze and do something which you consider appropriate. +If I compare with kernel, it looks good: + +In Linux: + + arch/mips/include/uapi/asm/signal.h + + #define _NSIG 128 + #define _NSIG_BPW (sizeof(unsigned long) * 8) + #define _NSIG_WORDS (_NSIG / _NSIG_BPW) + + typedef struct { + unsigned long sig[_NSIG_WORDS]; + } sigset_t; + +_NSIG_BPW is 8 * 8 = 64 on MIPS64 or 4 * 8 = 32 on MIPS + +In QEMU: + +TARGET_NSIG_BPW is TARGET_ABI_BITS which is TARGET_LONG_BITS which is +64 on MIPS64 and 32 on MIPS. + +I think there is no problem. + +Thanks, +Laurent + +From: Laurent Vivier +> +If I compare with kernel, it looks good: +> +... +> +I think there is no problem. +Sure, thanks for such fast response - again, I am glad if you are right. +However, for some reason, glibc (and musl too) define sigset_t differently than +kernel. Please take a look. I am not sure if this is covered fine in our code. + +Yours, +Aleksandar + +> +Thanks, +> +Laurent + +On Wed, 3 Jul 2019 at 21:20, Aleksandar Markovic wrote: +> +> +From: Laurent Vivier +> +> If I compare with kernel, it looks good: +> +> ... +> +> I think there is no problem. +> +> +Sure, thanks for such fast response - again, I am glad if you are right. +> +However, for some reason, glibc (and musl too) define sigset_t differently +> +than kernel. Please take a look. I am not sure if this is covered fine in our +> +code. +Yeah, the libc definitions of sigset_t don't match the +kernel ones (this is for obscure historical reasons IIRC). +We're providing implementations of the target +syscall interface, so our target_sigset_t should be the +target kernel's version (and the target libc's version doesn't +matter to us). On the other hand we will be using the +host libc version, I think, so a little caution is required +and it's possible we have some bugs in our code. + +thanks +-- PMM + +> +From: Peter Maydell +> +> +On Wed, 3 Jul 2019 at 21:20, Aleksandar Markovic wrote: +> +> +> +> From: Laurent Vivier +> +> > If I compare with kernel, it looks good: +> +> > ... +> +> > I think there is no problem. +> +> +> +> Sure, thanks for such fast response - again, I am glad if you are right. +> +> However, for some reason, glibc (and musl too) define sigset_t differently +> +> than kernel. Please take a look. I am not sure if this is covered fine in +> +> our code. +> +> +Yeah, the libc definitions of sigset_t don't match the +> +kernel ones (this is for obscure historical reasons IIRC). +> +We're providing implementations of the target +> +syscall interface, so our target_sigset_t should be the +> +target kernel's version (and the target libc's version doesn't +> +matter to us). On the other hand we will be using the +> +host libc version, I think, so a little caution is required +> +and it's possible we have some bugs in our code. +OK, I gather than this is not something that requires our immediate attention +(for 4.1), but we can analyze it later on. + +Thanks for response!! + +Sincerely, +Aleksandar + +> +thanks +> +-- PMM + +Le 03/07/2019 à 22:28, Peter Maydell a écrit : +> +On Wed, 3 Jul 2019 at 21:20, Aleksandar Markovic wrote: +> +> +> +> From: Laurent Vivier +> +>> If I compare with kernel, it looks good: +> +>> ... +> +>> I think there is no problem. +> +> +> +> Sure, thanks for such fast response - again, I am glad if you are right. +> +> However, for some reason, glibc (and musl too) define sigset_t differently +> +> than kernel. Please take a look. I am not sure if this is covered fine in +> +> our code. +> +> +Yeah, the libc definitions of sigset_t don't match the +> +kernel ones (this is for obscure historical reasons IIRC). +> +We're providing implementations of the target +> +syscall interface, so our target_sigset_t should be the +> +target kernel's version (and the target libc's version doesn't +> +matter to us). On the other hand we will be using the +> +host libc version, I think, so a little caution is required +> +and it's possible we have some bugs in our code. +It's why we need host_to_target_sigset_internal() and +target_to_host_sigset_internal() that translates bits and bytes between +guest kernel interface and host libc interface. + +void host_to_target_sigset_internal(target_sigset_t *d, + const sigset_t *s) +{ + int i; + target_sigemptyset(d); + for (i = 1; i <= TARGET_NSIG; i++) { + if (sigismember(s, i)) { + target_sigaddset(d, host_to_target_signal(i)); + } + } +} + +void target_to_host_sigset_internal(sigset_t *d, + const target_sigset_t *s) +{ + int i; + sigemptyset(d); + for (i = 1; i <= TARGET_NSIG; i++) { + if (target_sigismember(s, i)) { + sigaddset(d, target_to_host_signal(i)); + } + } +} + +Thanks, +Laurent + +Hi Aleksandar, + +On Wed, Jul 3, 2019 at 12:48 PM Aleksandar Markovic + wrote: +> +#define TARGET_NSIG_BPW TARGET_ABI_BITS +> +#define TARGET_NSIG_WORDS (TARGET_NSIG / TARGET_NSIG_BPW) +> +> +typedef struct { +> +abi_ulong sig[TARGET_NSIG_WORDS]; +> +} target_sigset_t; +> +> +... TARGET_ABI_BITS should be replaced by eight times smaller constant (in +> +fact, +> +semantically, we need TARGET_ABI_BYTES, but it is not defined) (what is needed +> +is actually "a byte per signal" in target_sigset_t, and we allow "a bit per +> +signal"). +Why do we need a byte per target signal, if the functions in linux-user/signal.c +operate with bits? + +-- +Thanks. +-- Max + +> +Why do we need a byte per target signal, if the functions in +> +linux-user/signal.c +> +operate with bits? +Max, + +I did not base my findings on code analysis, but on dumping size/offsets of +elements of some structures, as they are emulated in QEMU, and in real systems. +So, I can't really answer your question. + +Yours, +Aleksandar + +> +-- +> +Thanks. +> +-- Max + diff --git a/classification_output/01/other/02572177 b/classification_output/01/other/02572177 new file mode 100644 index 000000000..55a82678b --- /dev/null +++ b/classification_output/01/other/02572177 @@ -0,0 +1,421 @@ +other: 0.869 +instruction: 0.794 +semantic: 0.770 +mistranslation: 0.693 + +[Qemu-devel] 答复: Re: [BUG]COLO failover hang + +hi. + + +I test the git qemu master have the same problem. + + +(gdb) bt + + +#0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, niov=1, +fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 + + +#1 0x00007f658e4aa0c2 in qio_channel_read (address@hidden, address@hidden "", +address@hidden, address@hidden) at io/channel.c:114 + + +#2 0x00007f658e3ea990 in channel_get_buffer (opaque=<optimized out>, +buf=0x7f65907cb838 "", pos=<optimized out>, size=32768) at +migration/qemu-file-channel.c:78 + + +#3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at +migration/qemu-file.c:295 + + +#4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, address@hidden) at +migration/qemu-file.c:555 + + +#5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at +migration/qemu-file.c:568 + + +#6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at +migration/qemu-file.c:648 + + +#7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, +address@hidden) at migration/colo.c:244 + + +#8 0x00007f658e3e681e in colo_receive_check_message (f=<optimized out>, +address@hidden, address@hidden) + + + at migration/colo.c:264 + + +#9 0x00007f658e3e740e in colo_process_incoming_thread (opaque=0x7f658eb30360 +<mis_current.31286>) at migration/colo.c:577 + + +#10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 + + +#11 0x00007f65881983ed in clone () from /lib64/libc.so.6 + + +(gdb) p ioc->name + + +$2 = 0x7f658ff7d5c0 "migration-socket-incoming" + + +(gdb) p ioc->features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN + + +$3 = 0 + + + + + +(gdb) bt + + +#0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, condition=G_IO_IN, +opaque=0x7fdcceeafa90) at migration/socket.c:137 + + +#1 0x00007fdcc6966350 in g_main_dispatch (context=<optimized out>) at +gmain.c:3054 + + +#2 g_main_context_dispatch (context=<optimized out>, address@hidden) at +gmain.c:3630 + + +#3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 + + +#4 os_host_main_loop_wait (timeout=<optimized out>) at util/main-loop.c:258 + + +#5 main_loop_wait (address@hidden) at util/main-loop.c:506 + + +#6 0x00007fdccb526187 in main_loop () at vl.c:1898 + + +#7 main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at +vl.c:4709 + + +(gdb) p ioc->features + + +$1 = 6 + + +(gdb) p ioc->name + + +$2 = 0x7fdcce1b1ab0 "migration-socket-listener" + + + + + +May be socket_accept_incoming_migration should call +qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? + + + + + +thank you. + + + + + + + + + + + + + + + +原始邮件 + + + +发件人: address@hidden +收件人:王广10165992 address@hidden +抄送人: address@hidden address@hidden +日 期 :2017å¹´03月16日 14:46 +主 题 :Re: [Qemu-devel] COLO failover hang + + + + + + + +On 03/15/2017 05:06 PM, wangguang wrote: +> am testing QEMU COLO feature described here [QEMU +> Wiki]( +http://wiki.qemu-project.org/Features/COLO +). +> +> When the Primary Node panic,the Secondary Node qemu hang. +> hang at recvmsg in qio_channel_socket_readv. +> And I run { 'execute': 'nbd-server-stop' } and { "execute": +> "x-colo-lost-heartbeat" } in Secondary VM's +> monitor,the Secondary Node qemu still hang at recvmsg . +> +> I found that the colo in qemu is not complete yet. +> Do the colo have any plan for development? + +Yes, We are developing. You can see some of patch we pushing. + +> Has anyone ever run it successfully? Any help is appreciated! + +In our internal version can run it successfully, +The failover detail you can ask Zhanghailiang for help. +Next time if you have some question about COLO, +please cc me and zhanghailiang address@hidden + + +Thanks +Zhang Chen + + +> +> +> +> centos7.2+qemu2.7.50 +> (gdb) bt +> #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 +> #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=<optimized out>, +> iov=<optimized out>, niov=<optimized out>, fds=0x0, nfds=0x0, errp=0x0) at +> io/channel-socket.c:497 +> #2 0x00007f3e03329472 in qio_channel_read (address@hidden, +> address@hidden "", address@hidden, +> address@hidden) at io/channel.c:97 +> #3 0x00007f3e032750e0 in channel_get_buffer (opaque=<optimized out>, +> buf=0x7f3e05910f38 "", pos=<optimized out>, size=32768) at +> migration/qemu-file-channel.c:78 +> #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at +> migration/qemu-file.c:257 +> #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, +> address@hidden) at migration/qemu-file.c:510 +> #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at +> migration/qemu-file.c:523 +> #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at +> migration/qemu-file.c:603 +> #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, +> address@hidden) at migration/colo.c:215 +> #9 0x00007f3e0327250d in colo_wait_handle_message (errp=0x7f3d62bfaa48, +> checkpoint_request=<synthetic pointer>, f=<optimized out>) at +> migration/colo.c:546 +> #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at +> migration/colo.c:649 +> #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 +> #12 0x00007f3dfc9c03ed in clone () from /lib64/libc.so.6 +> +> +> +> +> +> -- +> View this message in context: +http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html +> Sent from the Developer mailing list archive at Nabble.com. +> +> +> +> + +-- +Thanks +Zhang Chen + +Hi,Wang. + +You can test this branch: +https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk +and please follow wiki ensure your own configuration correctly. +http://wiki.qemu-project.org/Features/COLO +Thanks + +Zhang Chen + + +On 03/21/2017 03:27 PM, address@hidden wrote: +hi. + +I test the git qemu master have the same problem. + +(gdb) bt +#0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, +niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 +#1 0x00007f658e4aa0c2 in qio_channel_read +(address@hidden, address@hidden "", +address@hidden, address@hidden) at io/channel.c:114 +#2 0x00007f658e3ea990 in channel_get_buffer (opaque=<optimized out>, +buf=0x7f65907cb838 "", pos=<optimized out>, size=32768) at +migration/qemu-file-channel.c:78 +#3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at +migration/qemu-file.c:295 +#4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, +address@hidden) at migration/qemu-file.c:555 +#5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at +migration/qemu-file.c:568 +#6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at +migration/qemu-file.c:648 +#7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, +address@hidden) at migration/colo.c:244 +#8 0x00007f658e3e681e in colo_receive_check_message (f=<optimized +out>, address@hidden, +address@hidden) +at migration/colo.c:264 +#9 0x00007f658e3e740e in colo_process_incoming_thread +(opaque=0x7f658eb30360 <mis_current.31286>) at migration/colo.c:577 +#10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 + +#11 0x00007f65881983ed in clone () from /lib64/libc.so.6 + +(gdb) p ioc->name + +$2 = 0x7f658ff7d5c0 "migration-socket-incoming" + +(gdb) p ioc->features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN + +$3 = 0 + + +(gdb) bt +#0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, +condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 +#1 0x00007fdcc6966350 in g_main_dispatch (context=<optimized out>) at +gmain.c:3054 +#2 g_main_context_dispatch (context=<optimized out>, +address@hidden) at gmain.c:3630 +#3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 +#4 os_host_main_loop_wait (timeout=<optimized out>) at +util/main-loop.c:258 +#5 main_loop_wait (address@hidden) at +util/main-loop.c:506 +#6 0x00007fdccb526187 in main_loop () at vl.c:1898 +#7 main (argc=<optimized out>, argv=<optimized out>, envp=<optimized +out>) at vl.c:4709 +(gdb) p ioc->features + +$1 = 6 + +(gdb) p ioc->name + +$2 = 0x7fdcce1b1ab0 "migration-socket-listener" +May be socket_accept_incoming_migration should +call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? +thank you. + + + + + +原始邮件 +address@hidden; +*收件人:*王广10165992;address@hidden; +address@hidden;address@hidden; +*日 期 :*2017å¹´03月16日 14:46 +*主 题 :**Re: [Qemu-devel] COLO failover hang* + + + + +On 03/15/2017 05:06 PM, wangguang wrote: +> am testing QEMU COLO feature described here [QEMU +> Wiki]( +http://wiki.qemu-project.org/Features/COLO +). +> +> When the Primary Node panic,the Secondary Node qemu hang. +> hang at recvmsg in qio_channel_socket_readv. +> And I run { 'execute': 'nbd-server-stop' } and { "execute": +> "x-colo-lost-heartbeat" } in Secondary VM's +> monitor,the Secondary Node qemu still hang at recvmsg . +> +> I found that the colo in qemu is not complete yet. +> Do the colo have any plan for development? + +Yes, We are developing. You can see some of patch we pushing. + +> Has anyone ever run it successfully? Any help is appreciated! + +In our internal version can run it successfully, +The failover detail you can ask Zhanghailiang for help. +Next time if you have some question about COLO, +please cc me and zhanghailiang address@hidden + + +Thanks +Zhang Chen + + +> +> +> +> centos7.2+qemu2.7.50 +> (gdb) bt +> #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 +> #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=<optimized out>, +> iov=<optimized out>, niov=<optimized out>, fds=0x0, nfds=0x0, errp=0x0) at +> io/channel-socket.c:497 +> #2 0x00007f3e03329472 in qio_channel_read (address@hidden, +> address@hidden "", address@hidden, +> address@hidden) at io/channel.c:97 +> #3 0x00007f3e032750e0 in channel_get_buffer (opaque=<optimized out>, +> buf=0x7f3e05910f38 "", pos=<optimized out>, size=32768) at +> migration/qemu-file-channel.c:78 +> #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at +> migration/qemu-file.c:257 +> #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, +> address@hidden) at migration/qemu-file.c:510 +> #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at +> migration/qemu-file.c:523 +> #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at +> migration/qemu-file.c:603 +> #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, +> address@hidden) at migration/colo.c:215 +> #9 0x00007f3e0327250d in colo_wait_handle_message (errp=0x7f3d62bfaa48, +> checkpoint_request=<synthetic pointer>, f=<optimized out>) at +> migration/colo.c:546 +> #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at +> migration/colo.c:649 +> #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 +> #12 0x00007f3dfc9c03ed in clone () from /lib64/libc.so.6 +> +> +> +> +> +> -- +> View this message in context: +http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html +> Sent from the Developer mailing list archive at Nabble.com. +> +> +> +> + +-- +Thanks +Zhang Chen +-- +Thanks +Zhang Chen + diff --git a/classification_output/01/other/04472277 b/classification_output/01/other/04472277 new file mode 100644 index 000000000..95b28c963 --- /dev/null +++ b/classification_output/01/other/04472277 @@ -0,0 +1,576 @@ +other: 0.846 +instruction: 0.845 +mistranslation: 0.817 +semantic: 0.815 + +[BUG][KVM_SET_USER_MEMORY_REGION] KVM_SET_USER_MEMORY_REGION failed + +Hi all, +I start a VM in openstack, and openstack use libvirt to start qemu VM, but now log show this ERROR. +Is there any one know this? +The ERROR log from /var/log/libvirt/qemu/instance-0000000e.log +``` +2023-03-14T10:09:17.674114Z qemu-system-x86_64: kvm_set_user_memory_region: KVM_SET_USER_MEMORY_REGION failed, slot=4, start=0xfffffffffe000000, size=0x2000: Invalid argument +kvm_set_phys_mem: error registering slot: Invalid argument +2023-03-14 10:09:18.198+0000: shutting down, reason=crashed +``` +The xml file +``` +root@c1c2:~# cat /etc/libvirt/qemu/instance-0000000e.xml + + +  instance-0000000e +  ff91d2dc-69a1-43ef-abde-c9e4e9a0305b +  +    +      +      provider-instance +      2023-03-14 10:09:13 +      +        64 +        1 +        0 +        0 +        1 +      +      +        admin +        admin +      +      +      +        +          +        +      +    +  +  65536 +  65536 +  1 +  +    +      OpenStack Foundation +      OpenStack Nova +      25.1.0 +      ff91d2dc-69a1-43ef-abde-c9e4e9a0305b +      ff91d2dc-69a1-43ef-abde-c9e4e9a0305b +      Virtual Machine +    +  +  +    hvm +    +    +  +  +    +    +    +  +  +    +  +  +    +    +    +  +  destroy +  restart +  destroy +  +    /usr/bin/qemu-system-x86_64 +    +      +      +      +     
+    +    +     
+    +    +    +      +      +       
+      +     
+    +    +      +      +        +      +    +    +      +      +    +    +     
+    +    +    +    +      +    +  Â