diff options
Diffstat (limited to 'hw/ppc/spapr_events.c')
| -rw-r--r-- | hw/ppc/spapr_events.c | 49 |
1 files changed, 38 insertions, 11 deletions
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index a4a540f43d..1069d0197b 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -833,11 +833,28 @@ static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered) /* get rtas addr from fdt */ rtas_addr = spapr_get_rtas_addr(); if (!rtas_addr) { - qemu_system_guest_panicked(NULL); + if (!recovered) { + error_report( +"FWNMI: Unable to deliver machine check to guest: rtas_addr not found."); + qemu_system_guest_panicked(NULL); + } else { + warn_report( +"FWNMI: Unable to deliver machine check to guest: rtas_addr not found. " +"Machine check recovered."); + } g_free(ext_elog); return; } + /* + * By taking the interlock, we assume that the MCE will be + * delivered to the guest. CAUTION: don't add anything that could + * prevent the MCE to be delivered after this line, otherwise the + * guest won't be able to release the interlock and ultimately + * hang/crash? + */ + spapr->fwnmi_machine_check_interlock = cpu->vcpu_id; + stq_be_phys(&address_space_memory, rtas_addr + RTAS_ERROR_LOG_OFFSET, env->gpr[3]); cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + @@ -860,17 +877,13 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered) Error *local_err = NULL; if (spapr->fwnmi_machine_check_addr == -1) { - /* - * This implies that we have hit a machine check either when the - * guest has not registered FWNMI (i.e., "ibm,nmi-register" not - * called) or between system reset and "ibm,nmi-register". - * Fall back to the old machine check behavior in such cases. - */ + /* Non-FWNMI case, deliver it like an architected CPU interrupt. */ cs->exception_index = POWERPC_EXCP_MCHECK; ppc_cpu_do_interrupt(cs); return; } + /* Wait for FWNMI interlock. */ while (spapr->fwnmi_machine_check_interlock != -1) { /* * Check whether the same CPU got machine check error @@ -878,12 +891,25 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered) * that CPU called "ibm,nmi-interlock") */ if (spapr->fwnmi_machine_check_interlock == cpu->vcpu_id) { - qemu_system_guest_panicked(NULL); + if (!recovered) { + error_report( +"FWNMI: Unable to deliver machine check to guest: nested machine check."); + qemu_system_guest_panicked(NULL); + } else { + warn_report( +"FWNMI: Unable to deliver machine check to guest: nested machine check. " +"Machine check recovered."); + } return; } qemu_cond_wait_iothread(&spapr->fwnmi_machine_check_interlock_cond); - /* Meanwhile if the system is reset, then just return */ if (spapr->fwnmi_machine_check_addr == -1) { + /* + * If the machine was reset while waiting for the interlock, + * abort the delivery. The machine check applies to a context + * that no longer exists, so it wouldn't make sense to deliver + * it now. + */ return; } } @@ -894,12 +920,13 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered) * We don't want to abort so we let the migration to continue. * In a rare case, the machine check handler will run on the target. * Though this is not preferable, it is better than aborting - * the migration or killing the VM. + * the migration or killing the VM. It is okay to call + * migrate_del_blocker on a blocker that was not added (which the + * nmi-interlock handler would do when it's called after this). */ warn_report("Received a fwnmi while migration was in progress"); } - spapr->fwnmi_machine_check_interlock = cpu->vcpu_id; spapr_mce_dispatch_elog(cpu, recovered); } |