summary refs log tree commit diff stats
path: root/hw/i386
diff options
context:
space:
mode:
Diffstat (limited to 'hw/i386')
-rw-r--r--hw/i386/acpi-build.c112
-rw-r--r--hw/i386/kvm/apic.c12
-rw-r--r--hw/i386/pc.c82
-rw-r--r--hw/i386/pc_q35.c2
4 files changed, 141 insertions, 67 deletions
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index e9996549cc..93be96f89c 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -340,24 +340,38 @@ build_fadt(GArray *table_data, BIOSLinker *linker, AcpiPmInfo *pm,
 void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid,
                        CPUArchIdList *apic_ids, GArray *entry)
 {
-    int apic_id;
-    AcpiMadtProcessorApic *apic = acpi_data_push(entry, sizeof *apic);
-
-    apic_id = apic_ids->cpus[uid].arch_id;
-    apic->type = ACPI_APIC_PROCESSOR;
-    apic->length = sizeof(*apic);
-    apic->processor_id = uid;
-    apic->local_apic_id = apic_id;
-    if (apic_ids->cpus[uid].cpu != NULL) {
-        apic->flags = cpu_to_le32(1);
+    uint32_t apic_id = apic_ids->cpus[uid].arch_id;
+
+    /* ACPI spec says that LAPIC entry for non present
+     * CPU may be omitted from MADT or it must be marked
+     * as disabled. However omitting non present CPU from
+     * MADT breaks hotplug on linux. So possible CPUs
+     * should be put in MADT but kept disabled.
+     */
+    if (apic_id < 255) {
+        AcpiMadtProcessorApic *apic = acpi_data_push(entry, sizeof *apic);
+
+        apic->type = ACPI_APIC_PROCESSOR;
+        apic->length = sizeof(*apic);
+        apic->processor_id = uid;
+        apic->local_apic_id = apic_id;
+        if (apic_ids->cpus[uid].cpu != NULL) {
+            apic->flags = cpu_to_le32(1);
+        } else {
+            apic->flags = cpu_to_le32(0);
+        }
     } else {
-        /* ACPI spec says that LAPIC entry for non present
-         * CPU may be omitted from MADT or it must be marked
-         * as disabled. However omitting non present CPU from
-         * MADT breaks hotplug on linux. So possible CPUs
-         * should be put in MADT but kept disabled.
-         */
-        apic->flags = cpu_to_le32(0);
+        AcpiMadtProcessorX2Apic *apic = acpi_data_push(entry, sizeof *apic);
+
+        apic->type = ACPI_APIC_LOCAL_X2APIC;
+        apic->length = sizeof(*apic);
+        apic->uid = cpu_to_le32(uid);
+        apic->x2apic_id = cpu_to_le32(apic_id);
+        if (apic_ids->cpus[uid].cpu != NULL) {
+            apic->flags = cpu_to_le32(1);
+        } else {
+            apic->flags = cpu_to_le32(0);
+        }
     }
 }
 
@@ -369,11 +383,11 @@ build_madt(GArray *table_data, BIOSLinker *linker, PCMachineState *pcms)
     int madt_start = table_data->len;
     AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(pcms->acpi_dev);
     AcpiDeviceIf *adev = ACPI_DEVICE_IF(pcms->acpi_dev);
+    bool x2apic_mode = false;
 
     AcpiMultipleApicTable *madt;
     AcpiMadtIoApic *io_apic;
     AcpiMadtIntsrcovr *intsrcovr;
-    AcpiMadtLocalNmi *local_nmi;
     int i;
 
     madt = acpi_data_push(table_data, sizeof *madt);
@@ -382,6 +396,9 @@ build_madt(GArray *table_data, BIOSLinker *linker, PCMachineState *pcms)
 
     for (i = 0; i < apic_ids->len; i++) {
         adevc->madt_cpu(adev, i, apic_ids, table_data);
+        if (apic_ids->cpus[i].arch_id > 254) {
+            x2apic_mode = true;
+        }
     }
     g_free(apic_ids);
 
@@ -414,12 +431,25 @@ build_madt(GArray *table_data, BIOSLinker *linker, PCMachineState *pcms)
         intsrcovr->flags  = cpu_to_le16(0xd); /* active high, level triggered */
     }
 
-    local_nmi = acpi_data_push(table_data, sizeof *local_nmi);
-    local_nmi->type         = ACPI_APIC_LOCAL_NMI;
-    local_nmi->length       = sizeof(*local_nmi);
-    local_nmi->processor_id = 0xff; /* all processors */
-    local_nmi->flags        = cpu_to_le16(0);
-    local_nmi->lint         = 1; /* ACPI_LINT1 */
+    if (x2apic_mode) {
+        AcpiMadtLocalX2ApicNmi *local_nmi;
+
+        local_nmi = acpi_data_push(table_data, sizeof *local_nmi);
+        local_nmi->type   = ACPI_APIC_LOCAL_X2APIC_NMI;
+        local_nmi->length = sizeof(*local_nmi);
+        local_nmi->uid    = 0xFFFFFFFF; /* all processors */
+        local_nmi->flags  = cpu_to_le16(0);
+        local_nmi->lint   = 1; /* ACPI_LINT1 */
+    } else {
+        AcpiMadtLocalNmi *local_nmi;
+
+        local_nmi = acpi_data_push(table_data, sizeof *local_nmi);
+        local_nmi->type         = ACPI_APIC_LOCAL_NMI;
+        local_nmi->length       = sizeof(*local_nmi);
+        local_nmi->processor_id = 0xff; /* all processors */
+        local_nmi->flags        = cpu_to_le16(0);
+        local_nmi->lint         = 1; /* ACPI_LINT1 */
+    }
 
     build_header(linker, table_data,
                  (void *)(table_data->data + madt_start), "APIC",
@@ -2391,7 +2421,6 @@ static void
 build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
 {
     AcpiSystemResourceAffinityTable *srat;
-    AcpiSratProcessorAffinity *core;
     AcpiSratMemoryAffinity *numamem;
 
     int i;
@@ -2411,18 +2440,33 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
 
     for (i = 0; i < apic_ids->len; i++) {
         int j = numa_get_node_for_cpu(i);
-        int apic_id = apic_ids->cpus[i].arch_id;
+        uint32_t apic_id = apic_ids->cpus[i].arch_id;
 
-        core = acpi_data_push(table_data, sizeof *core);
-        core->type = ACPI_SRAT_PROCESSOR_APIC;
-        core->length = sizeof(*core);
-        core->local_apic_id = apic_id;
-        if (j < nb_numa_nodes) {
+        if (apic_id < 255) {
+            AcpiSratProcessorAffinity *core;
+
+            core = acpi_data_push(table_data, sizeof *core);
+            core->type = ACPI_SRAT_PROCESSOR_APIC;
+            core->length = sizeof(*core);
+            core->local_apic_id = apic_id;
+            if (j < nb_numa_nodes) {
                 core->proximity_lo = j;
+            }
+            memset(core->proximity_hi, 0, 3);
+            core->local_sapic_eid = 0;
+            core->flags = cpu_to_le32(1);
+        } else {
+            AcpiSratProcessorX2ApicAffinity *core;
+
+            core = acpi_data_push(table_data, sizeof *core);
+            core->type = ACPI_SRAT_PROCESSOR_x2APIC;
+            core->length = sizeof(*core);
+            core->x2apic_id = cpu_to_le32(apic_id);
+            if (j < nb_numa_nodes) {
+                core->proximity_domain = cpu_to_le32(j);
+            }
+            core->flags = cpu_to_le32(1);
         }
-        memset(core->proximity_hi, 0, 3);
-        core->local_sapic_eid = 0;
-        core->flags = cpu_to_le32(1);
     }
 
 
diff --git a/hw/i386/kvm/apic.c b/hw/i386/kvm/apic.c
index be55102c00..39b73e7b3d 100644
--- a/hw/i386/kvm/apic.c
+++ b/hw/i386/kvm/apic.c
@@ -34,7 +34,11 @@ static void kvm_put_apic_state(APICCommonState *s, struct kvm_lapic_state *kapic
     int i;
 
     memset(kapic, 0, sizeof(*kapic));
-    kvm_apic_set_reg(kapic, 0x2, s->id << 24);
+    if (kvm_has_x2apic_api() && s->apicbase & MSR_IA32_APICBASE_EXTD) {
+        kvm_apic_set_reg(kapic, 0x2, s->initial_apic_id);
+    } else {
+        kvm_apic_set_reg(kapic, 0x2, s->id << 24);
+    }
     kvm_apic_set_reg(kapic, 0x8, s->tpr);
     kvm_apic_set_reg(kapic, 0xd, s->log_dest << 24);
     kvm_apic_set_reg(kapic, 0xe, s->dest_mode << 28 | 0x0fffffff);
@@ -59,7 +63,11 @@ void kvm_get_apic_state(DeviceState *dev, struct kvm_lapic_state *kapic)
     APICCommonState *s = APIC_COMMON(dev);
     int i, v;
 
-    s->id = kvm_apic_get_reg(kapic, 0x2) >> 24;
+    if (kvm_has_x2apic_api() && s->apicbase & MSR_IA32_APICBASE_EXTD) {
+        assert(kvm_apic_get_reg(kapic, 0x2) == s->initial_apic_id);
+    } else {
+        s->id = kvm_apic_get_reg(kapic, 0x2) >> 24;
+    }
     s->tpr = kvm_apic_get_reg(kapic, 0x8);
     s->arb_id = kvm_apic_get_reg(kapic, 0x9);
     s->log_dest = kvm_apic_get_reg(kapic, 0xd) >> 24;
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index fb8f29c34c..f56ea0f87b 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -68,6 +68,7 @@
 #include "qapi-visit.h"
 #include "qom/cpu.h"
 #include "hw/nmi.h"
+#include "hw/i386/intel_iommu.h"
 
 /* debug PC/ISA interrupts */
 //#define DEBUG_IRQ
@@ -746,17 +747,15 @@ static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms)
 
     /* FW_CFG_MAX_CPUS is a bit confusing/problematic on x86:
      *
-     * SeaBIOS needs FW_CFG_MAX_CPUS for CPU hotplug, but the CPU hotplug
-     * QEMU<->SeaBIOS interface is not based on the "CPU index", but on the APIC
-     * ID of hotplugged CPUs[1]. This means that FW_CFG_MAX_CPUS is not the
-     * "maximum number of CPUs", but the "limit to the APIC ID values SeaBIOS
-     * may see".
+     * For machine types prior to 1.8, SeaBIOS needs FW_CFG_MAX_CPUS for
+     * building MPTable, ACPI MADT, ACPI CPU hotplug and ACPI SRAT table,
+     * that tables are based on xAPIC ID and QEMU<->SeaBIOS interface
+     * for CPU hotplug also uses APIC ID and not "CPU index".
+     * This means that FW_CFG_MAX_CPUS is not the "maximum number of CPUs",
+     * but the "limit to the APIC ID values SeaBIOS may see".
      *
-     * So, this means we must not use max_cpus, here, but the maximum possible
-     * APIC ID value, plus one.
-     *
-     * [1] The only kind of "CPU identifier" used between SeaBIOS and QEMU is
-     *     the APIC ID, not the "CPU index"
+     * So for compatibility reasons with old BIOSes we are stuck with
+     * "etc/max-cpus" actually being apic_id_limit
      */
     fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)pcms->apic_id_limit);
     fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
@@ -1087,17 +1086,6 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level)
     }
 }
 
-static int pc_present_cpus_count(PCMachineState *pcms)
-{
-    int i, boot_cpus = 0;
-    for (i = 0; i < pcms->possible_cpus->len; i++) {
-        if (pcms->possible_cpus->cpus[i].cpu) {
-            boot_cpus++;
-        }
-    }
-    return boot_cpus;
-}
-
 static X86CPU *pc_new_cpu(const char *typename, int64_t apic_id,
                           Error **errp)
 {
@@ -1190,12 +1178,6 @@ void pc_cpus_init(PCMachineState *pcms)
      * This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init().
      */
     pcms->apic_id_limit = x86_cpu_apic_id_from_index(max_cpus - 1) + 1;
-    if (pcms->apic_id_limit > ACPI_CPU_HOTPLUG_ID_LIMIT) {
-        error_report("max_cpus is too large. APIC ID of last CPU is %u",
-                     pcms->apic_id_limit - 1);
-        exit(1);
-    }
-
     pcms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
                                     sizeof(CPUArchId) * max_cpus);
     for (i = 0; i < max_cpus; i++) {
@@ -1240,6 +1222,19 @@ static void pc_build_feature_control_file(PCMachineState *pcms)
     fw_cfg_add_file(pcms->fw_cfg, "etc/msr_feature_control", val, sizeof(*val));
 }
 
+static void rtc_set_cpus_count(ISADevice *rtc, uint16_t cpus_count)
+{
+    if (cpus_count > 0xff) {
+        /* If the number of CPUs can't be represented in 8 bits, the
+         * BIOS must use "etc/boot-cpus". Set RTC field to 0 just
+         * to make old BIOSes fail more predictably.
+         */
+        rtc_set_memory(rtc, 0x5f, 0);
+    } else {
+        rtc_set_memory(rtc, 0x5f, cpus_count - 1);
+    }
+}
+
 static
 void pc_machine_done(Notifier *notifier, void *data)
 {
@@ -1248,7 +1243,7 @@ void pc_machine_done(Notifier *notifier, void *data)
     PCIBus *bus = pcms->bus;
 
     /* set the number of CPUs */
-    rtc_set_memory(pcms->rtc, 0x5f, pc_present_cpus_count(pcms) - 1);
+    rtc_set_cpus_count(pcms->rtc, le16_to_cpu(pcms->boot_cpus_le));
 
     if (bus) {
         int extra_hosts = 0;
@@ -1269,8 +1264,28 @@ void pc_machine_done(Notifier *notifier, void *data)
 
     acpi_setup();
     if (pcms->fw_cfg) {
+        MachineClass *mc = MACHINE_GET_CLASS(pcms);
+
         pc_build_smbios(pcms->fw_cfg);
         pc_build_feature_control_file(pcms);
+
+        if (mc->max_cpus > 255) {
+            fw_cfg_add_file(pcms->fw_cfg, "etc/boot-cpus", &pcms->boot_cpus_le,
+                            sizeof(pcms->boot_cpus_le));
+        }
+    }
+
+    if (pcms->apic_id_limit > 255) {
+        IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default());
+
+        if (!iommu || !iommu->x86_iommu.intr_supported ||
+            iommu->intr_eim != ON_OFF_AUTO_ON) {
+            error_report("current -smp configuration requires "
+                         "Extended Interrupt Mode enabled. "
+                         "You can add an IOMMU using: "
+                         "-device intel-iommu,intremap=on,eim=on");
+            exit(EXIT_FAILURE);
+        }
     }
 }
 
@@ -1794,9 +1809,11 @@ static void pc_cpu_plug(HotplugHandler *hotplug_dev,
         }
     }
 
+    /* increment the number of CPUs */
+    pcms->boot_cpus_le = cpu_to_le16(le16_to_cpu(pcms->boot_cpus_le) + 1);
     if (dev->hotplugged) {
-        /* increment the number of CPUs */
-        rtc_set_memory(pcms->rtc, 0x5f, rtc_get_memory(pcms->rtc, 0x5f) + 1);
+        /* Update the number of CPUs in CMOS */
+        rtc_set_cpus_count(pcms->rtc, le16_to_cpu(pcms->boot_cpus_le));
     }
 
     found_cpu = pc_find_cpu_slot(pcms, CPU(dev), NULL);
@@ -1850,7 +1867,10 @@ static void pc_cpu_unplug_cb(HotplugHandler *hotplug_dev,
     found_cpu->cpu = NULL;
     object_unparent(OBJECT(dev));
 
-    rtc_set_memory(pcms->rtc, 0x5f, rtc_get_memory(pcms->rtc, 0x5f) - 1);
+    /* decrement the number of CPUs */
+    pcms->boot_cpus_le = cpu_to_le16(le16_to_cpu(pcms->boot_cpus_le) - 1);
+    /* Update the number of CPUs in CMOS */
+    rtc_set_cpus_count(pcms->rtc, le16_to_cpu(pcms->boot_cpus_le));
  out:
     error_propagate(errp, local_err);
 }
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 0b214f24c9..b40d19ee00 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -291,6 +291,7 @@ static void pc_q35_machine_options(MachineClass *m)
     m->default_display = "std";
     m->no_floppy = 1;
     m->has_dynamic_sysbus = true;
+    m->max_cpus = 288;
 }
 
 static void pc_q35_2_8_machine_options(MachineClass *m)
@@ -306,6 +307,7 @@ static void pc_q35_2_7_machine_options(MachineClass *m)
 {
     pc_q35_2_8_machine_options(m);
     m->alias = NULL;
+    m->max_cpus = 255;
     SET_MACHINE_COMPAT(m, PC_COMPAT_2_7);
 }