diff options
| author | Richard Henderson <richard.henderson@linaro.org> | 2025-10-06 08:14:03 -0700 |
|---|---|---|
| committer | Richard Henderson <richard.henderson@linaro.org> | 2025-10-06 08:14:03 -0700 |
| commit | 92a0dcbd751d771512b9dedd97e00553181b7699 (patch) | |
| tree | 9397cb6a11d637c3861e98e134925d550afbae68 /scripts/arm_processor_error.py | |
| parent | 5cbb72eef0bd78be04cc1abbfbf5a0e00d807a23 (diff) | |
| parent | e209d4d7a31b9f82925a2205e6e19e61a3facbe0 (diff) | |
| download | focaccia-qemu-92a0dcbd751d771512b9dedd97e00553181b7699.tar.gz focaccia-qemu-92a0dcbd751d771512b9dedd97e00553181b7699.zip | |
Merge tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu into staging
virtio,pci,pc: features, fixes users can now control VM bit in smbios. vhost-user-device is now user-createable. intel_iommu now supports PRI virtio-net now supports GSO over UDP tunnel ghes now supports error injection amd iommu now supports dma remapping for vfio better error messages for virtio small fixes all over the place. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # -----BEGIN PGP SIGNATURE----- # # iQFDBAABCgAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAmji0s0PHG1zdEByZWRo # YXQuY29tAAoJECgfDbjSjVRpuH4H/09h70IqAWZGHIWKGmmGGtdKOj3g54KuI0Ss # mGECEsHvvBexOy670Qy8jdgXfaW4UuNui8BiOnJnGsBX8Y0dy+/yZori3KhkXkaY # D57Ap9agkpHem7Vw0zgNsAF2bzDdlzTiQ6ns5oDnSq8yt82onCb5WGkWTGkPs/jL # Gf8Jv+Ddcpt5SU4/hHPYC8pUhl7z4xPOOyl0Qp1GG21Pxf5v4sGFcWuGGB7UEPSQ # MjZeoM0rSnLDtNg18sGwD5RPLQs13TbtgsVwijI79c3w3rcSpPNhGR5OWkdRCIYF # 8A0Nhq0Yfo0ogTht7yt1QNPf/ktJkuoBuGVirvpDaix2tCBECes= # =Zvq/ # -----END PGP SIGNATURE----- # gpg: Signature made Sun 05 Oct 2025 01:19:25 PM PDT # gpg: using RSA key 5D09FD0871C8F85B94CA8A0D281F0DB8D28D5469 # gpg: issuer "mst@redhat.com" # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [unknown] # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" [unknown] # gpg: WARNING: The key's User ID is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67 # Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469 * tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu: (75 commits) virtio: improve virtqueue mapping error messages pci: Fix wrong parameter passing to pci_device_get_iommu_bus_devfn() intel_iommu: Simplify caching mode check with VFIO device intel_iommu: Enable Enhanced Set Root Table Pointer Support (ESRTPS) vdpa-dev: add get_vhost() callback for vhost-vdpa device amd_iommu: HATDis/HATS=11 support intel-iommu: Move dma_translation to x86-iommu amd_iommu: Refactor amdvi_page_walk() to use common code for page walk amd_iommu: Do not assume passthrough translation when DTE[TV]=0 amd_iommu: Toggle address translation mode on devtab entry invalidation amd_iommu: Add dma-remap property to AMD vIOMMU device amd_iommu: Set all address spaces to use passthrough mode on reset amd_iommu: Toggle memory regions based on address translation mode amd_iommu: Invalidate address translations on INVALIDATE_IOMMU_ALL amd_iommu: Add replay callback amd_iommu: Unmap all address spaces under the AMD IOMMU on reset amd_iommu: Use iova_tree records to determine large page size on UNMAP amd_iommu: Sync shadow page tables on page invalidation amd_iommu: Add basic structure to support IOMMU notifier updates amd_iommu: Add a page walker to sync shadow page tables on invalidation ... Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'scripts/arm_processor_error.py')
| -rw-r--r-- | scripts/arm_processor_error.py | 476 |
1 files changed, 476 insertions, 0 deletions
diff --git a/scripts/arm_processor_error.py b/scripts/arm_processor_error.py new file mode 100644 index 0000000000..73d069f070 --- /dev/null +++ b/scripts/arm_processor_error.py @@ -0,0 +1,476 @@ +#!/usr/bin/env python3 +# +# pylint: disable=C0301,C0114,R0903,R0912,R0913,R0914,R0915,W0511 +# SPDX-License-Identifier: GPL-2.0-or-later +# +# Copyright (C) 2024-2025 Mauro Carvalho Chehab <mchehab+huawei@kernel.org> + +# TODO: current implementation has dummy defaults. +# +# For a better implementation, a QMP addition/call is needed to +# retrieve some data for ARM Processor Error injection: +# +# - ARM registers: power_state, mpidr. + +""" +Generate an ARM processor error CPER, compatible with +UEFI 2.9A Errata. + +Injecting such errors can be done using: + + $ ./scripts/ghes_inject.py arm + Error injected. + +Produces a simple CPER register, as detected on a Linux guest: + +[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 1 +[Hardware Error]: event severity: recoverable +[Hardware Error]: Error 0, type: recoverable +[Hardware Error]: section_type: ARM processor error +[Hardware Error]: MIDR: 0x0000000000000000 +[Hardware Error]: running state: 0x0 +[Hardware Error]: Power State Coordination Interface state: 0 +[Hardware Error]: Error info structure 0: +[Hardware Error]: num errors: 2 +[Hardware Error]: error_type: 0x02: cache error +[Hardware Error]: error_info: 0x000000000091000f +[Hardware Error]: transaction type: Data Access +[Hardware Error]: cache error, operation type: Data write +[Hardware Error]: cache level: 2 +[Hardware Error]: processor context not corrupted +[Firmware Warn]: GHES: Unhandled processor error type 0x02: cache error + +The ARM Processor Error message can be customized via command line +parameters. For instance: + + $ ./scripts/ghes_inject.py arm --mpidr 0x444 --running --affinity 1 \ + --error-info 12345678 --vendor 0x13,123,4,5,1 --ctx-array 0,1,2,3,4,5 \ + -t cache tlb bus micro-arch tlb,micro-arch + Error injected. + +Injects this error, as detected on a Linux guest: + +[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 1 +[Hardware Error]: event severity: recoverable +[Hardware Error]: Error 0, type: recoverable +[Hardware Error]: section_type: ARM processor error +[Hardware Error]: MIDR: 0x0000000000000000 +[Hardware Error]: Multiprocessor Affinity Register (MPIDR): 0x0000000000000000 +[Hardware Error]: error affinity level: 0 +[Hardware Error]: running state: 0x1 +[Hardware Error]: Power State Coordination Interface state: 0 +[Hardware Error]: Error info structure 0: +[Hardware Error]: num errors: 2 +[Hardware Error]: error_type: 0x02: cache error +[Hardware Error]: error_info: 0x0000000000bc614e +[Hardware Error]: cache level: 2 +[Hardware Error]: processor context not corrupted +[Hardware Error]: Error info structure 1: +[Hardware Error]: num errors: 2 +[Hardware Error]: error_type: 0x04: TLB error +[Hardware Error]: error_info: 0x000000000054007f +[Hardware Error]: transaction type: Instruction +[Hardware Error]: TLB error, operation type: Instruction fetch +[Hardware Error]: TLB level: 1 +[Hardware Error]: processor context not corrupted +[Hardware Error]: the error has not been corrected +[Hardware Error]: PC is imprecise +[Hardware Error]: Error info structure 2: +[Hardware Error]: num errors: 2 +[Hardware Error]: error_type: 0x08: bus error +[Hardware Error]: error_info: 0x00000080d6460fff +[Hardware Error]: transaction type: Generic +[Hardware Error]: bus error, operation type: Generic read (type of instruction or data request cannot be determined) +[Hardware Error]: affinity level at which the bus error occurred: 1 +[Hardware Error]: processor context corrupted +[Hardware Error]: the error has been corrected +[Hardware Error]: PC is imprecise +[Hardware Error]: Program execution can be restarted reliably at the PC associated with the error. +[Hardware Error]: participation type: Local processor observed +[Hardware Error]: request timed out +[Hardware Error]: address space: External Memory Access +[Hardware Error]: memory access attributes:0x20 +[Hardware Error]: access mode: secure +[Hardware Error]: Error info structure 3: +[Hardware Error]: num errors: 2 +[Hardware Error]: error_type: 0x10: micro-architectural error +[Hardware Error]: error_info: 0x0000000078da03ff +[Hardware Error]: Error info structure 4: +[Hardware Error]: num errors: 2 +[Hardware Error]: error_type: 0x14: TLB error|micro-architectural error +[Hardware Error]: Context info structure 0: +[Hardware Error]: register context type: AArch64 EL1 context registers +[Hardware Error]: 00000000: 00000000 00000000 +[Hardware Error]: Vendor specific error info has 5 bytes: +[Hardware Error]: 00000000: 13 7b 04 05 01 .{... +[Firmware Warn]: GHES: Unhandled processor error type 0x02: cache error +[Firmware Warn]: GHES: Unhandled processor error type 0x04: TLB error +[Firmware Warn]: GHES: Unhandled processor error type 0x08: bus error +[Firmware Warn]: GHES: Unhandled processor error type 0x10: micro-architectural error +[Firmware Warn]: GHES: Unhandled processor error type 0x14: TLB error|micro-architectural error +""" + +import argparse +import re + +from qmp_helper import qmp, util, cper_guid + + +class ArmProcessorEinj: + """ + Implements ARM Processor Error injection via GHES + """ + + DESC = """ + Generates an ARM processor error CPER, compatible with + UEFI 2.9A Errata. + """ + + ACPI_GHES_ARM_CPER_LENGTH = 40 + ACPI_GHES_ARM_CPER_PEI_LENGTH = 32 + + # Context types + CONTEXT_AARCH32_EL1 = 1 + CONTEXT_AARCH64_EL1 = 5 + CONTEXT_MISC_REG = 8 + + def __init__(self, subparsers): + """Initialize the error injection class and add subparser""" + + # Valid choice values + self.arm_valid_bits = { + "mpidr": util.bit(0), + "affinity": util.bit(1), + "running": util.bit(2), + "vendor": util.bit(3), + } + + self.pei_flags = { + "first": util.bit(0), + "last": util.bit(1), + "propagated": util.bit(2), + "overflow": util.bit(3), + } + + self.pei_error_types = { + "cache": util.bit(1), + "tlb": util.bit(2), + "bus": util.bit(3), + "micro-arch": util.bit(4), + } + + self.pei_valid_bits = { + "multiple-error": util.bit(0), + "flags": util.bit(1), + "error-info": util.bit(2), + "virt-addr": util.bit(3), + "phy-addr": util.bit(4), + } + + self.data = bytearray() + + parser = subparsers.add_parser("arm", description=self.DESC) + + arm_valid_bits = ",".join(self.arm_valid_bits.keys()) + flags = ",".join(self.pei_flags.keys()) + error_types = ",".join(self.pei_error_types.keys()) + pei_valid_bits = ",".join(self.pei_valid_bits.keys()) + + # UEFI N.16 ARM Validation bits + g_arm = parser.add_argument_group("ARM processor") + g_arm.add_argument("--arm", "--arm-valid", + help=f"ARM valid bits: {arm_valid_bits}") + g_arm.add_argument("-a", "--affinity", "--level", "--affinity-level", + type=lambda x: int(x, 0), + help="Affinity level (when multiple levels apply)") + g_arm.add_argument("-l", "--mpidr", type=lambda x: int(x, 0), + help="Multiprocessor Affinity Register") + g_arm.add_argument("-i", "--midr", type=lambda x: int(x, 0), + help="Main ID Register") + g_arm.add_argument("-r", "--running", + action=argparse.BooleanOptionalAction, + default=None, + help="Indicates if the processor is running or not") + g_arm.add_argument("--psci", "--psci-state", + type=lambda x: int(x, 0), + help="Power State Coordination Interface - PSCI state") + + # TODO: Add vendor-specific support + + # UEFI N.17 bitmaps (type and flags) + g_pei = parser.add_argument_group("ARM Processor Error Info (PEI)") + g_pei.add_argument("-t", "--type", nargs="+", + help=f"one or more error types: {error_types}") + g_pei.add_argument("-f", "--flags", nargs="*", + help=f"zero or more error flags: {flags}") + g_pei.add_argument("-V", "--pei-valid", "--error-valid", nargs="*", + help=f"zero or more PEI valid bits: {pei_valid_bits}") + + # UEFI N.17 Integer values + g_pei.add_argument("-m", "--multiple-error", nargs="+", + help="Number of errors: 0: Single error, 1: Multiple errors, 2-65535: Error count if known") + g_pei.add_argument("-e", "--error-info", nargs="+", + help="Error information (UEFI 2.10 tables N.18 to N.20)") + g_pei.add_argument("-p", "--physical-address", nargs="+", + help="Physical address") + g_pei.add_argument("-v", "--virtual-address", nargs="+", + help="Virtual address") + + # UEFI N.21 Context + g_ctx = parser.add_argument_group("Processor Context") + g_ctx.add_argument("--ctx-type", "--context-type", nargs="*", + help="Type of the context (0=ARM32 GPR, 5=ARM64 EL1, other values supported)") + g_ctx.add_argument("--ctx-size", "--context-size", nargs="*", + help="Minimal size of the context") + g_ctx.add_argument("--ctx-array", "--context-array", nargs="*", + help="Comma-separated arrays for each context") + + # Vendor-specific data + g_vendor = parser.add_argument_group("Vendor-specific data") + g_vendor.add_argument("--vendor", "--vendor-specific", nargs="+", + help="Vendor-specific byte arrays of data") + + # Add arguments for Generic Error Data + qmp.argparse(parser) + + parser.set_defaults(func=self.send_cper) + + def send_cper(self, args): + """Parse subcommand arguments and send a CPER via QMP""" + + qmp_cmd = qmp(args.host, args.port, args.debug) + + # Handle Generic Error Data arguments if any + qmp_cmd.set_args(args) + + is_cpu_type = re.compile(r"^([\w+]+\-)?arm\-cpu$") + cpus = qmp_cmd.search_qom("/machine/unattached/device", + "type", is_cpu_type) + + cper = {} + pei = {} + ctx = {} + vendor = {} + + arg = vars(args) + + # Handle global parameters + if args.arm: + arm_valid_init = False + cper["valid"] = util.get_choice(name="valid", + value=args.arm, + choices=self.arm_valid_bits, + suffixes=["-error", "-err"]) + else: + cper["valid"] = 0 + arm_valid_init = True + + if "running" in arg: + if args.running: + cper["running-state"] = util.bit(0) + else: + cper["running-state"] = 0 + else: + cper["running-state"] = 0 + + if arm_valid_init: + if args.affinity: + cper["valid"] |= self.arm_valid_bits["affinity"] + + if args.mpidr: + cper["valid"] |= self.arm_valid_bits["mpidr"] + + if "running-state" in cper: + cper["valid"] |= self.arm_valid_bits["running"] + + if args.psci: + cper["valid"] |= self.arm_valid_bits["running"] + + # Handle PEI + if not args.type: + args.type = ["cache-error"] + + util.get_mult_choices( + pei, + name="valid", + values=args.pei_valid, + choices=self.pei_valid_bits, + suffixes=["-valid", "--addr"], + ) + util.get_mult_choices( + pei, + name="type", + values=args.type, + choices=self.pei_error_types, + suffixes=["-error", "-err"], + ) + util.get_mult_choices( + pei, + name="flags", + values=args.flags, + choices=self.pei_flags, + suffixes=["-error", "-cap"], + ) + util.get_mult_int(pei, "error-info", args.error_info) + util.get_mult_int(pei, "multiple-error", args.multiple_error) + util.get_mult_int(pei, "phy-addr", args.physical_address) + util.get_mult_int(pei, "virt-addr", args.virtual_address) + + # Handle context + util.get_mult_int(ctx, "type", args.ctx_type, allow_zero=True) + util.get_mult_int(ctx, "minimal-size", args.ctx_size, allow_zero=True) + util.get_mult_array(ctx, "register", args.ctx_array, allow_zero=True) + + util.get_mult_array(vendor, "bytes", args.vendor, max_val=255) + + # Store PEI + pei_data = bytearray() + default_flags = self.pei_flags["first"] + default_flags |= self.pei_flags["last"] + + error_info_num = 0 + + for i, p in pei.items(): # pylint: disable=W0612 + error_info_num += 1 + + # UEFI 2.10 doesn't define how to encode error information + # when multiple types are raised. So, provide a default only + # if a single type is there + if "error-info" not in p: + if p["type"] == util.bit(1): + p["error-info"] = 0x0091000F + if p["type"] == util.bit(2): + p["error-info"] = 0x0054007F + if p["type"] == util.bit(3): + p["error-info"] = 0x80D6460FFF + if p["type"] == util.bit(4): + p["error-info"] = 0x78DA03FF + + if "valid" not in p: + p["valid"] = 0 + if "multiple-error" in p: + p["valid"] |= self.pei_valid_bits["multiple-error"] + + if "flags" in p: + p["valid"] |= self.pei_valid_bits["flags"] + + if "error-info" in p: + p["valid"] |= self.pei_valid_bits["error-info"] + + if "phy-addr" in p: + p["valid"] |= self.pei_valid_bits["phy-addr"] + + if "virt-addr" in p: + p["valid"] |= self.pei_valid_bits["virt-addr"] + + # Version + util.data_add(pei_data, 0, 1) + + util.data_add(pei_data, + self.ACPI_GHES_ARM_CPER_PEI_LENGTH, 1) + + util.data_add(pei_data, p["valid"], 2) + util.data_add(pei_data, p["type"], 1) + util.data_add(pei_data, p.get("multiple-error", 1), 2) + util.data_add(pei_data, p.get("flags", default_flags), 1) + util.data_add(pei_data, p.get("error-info", 0), 8) + util.data_add(pei_data, p.get("virt-addr", 0xDEADBEEF), 8) + util.data_add(pei_data, p.get("phy-addr", 0xABBA0BAD), 8) + + # Store Context + ctx_data = bytearray() + context_info_num = 0 + + if ctx: + ret = qmp_cmd.send_cmd("query-target", may_open=True) + + default_ctx = self.CONTEXT_MISC_REG + + if "arch" in ret: + if ret["arch"] == "aarch64": + default_ctx = self.CONTEXT_AARCH64_EL1 + elif ret["arch"] == "arm": + default_ctx = self.CONTEXT_AARCH32_EL1 + + for k in sorted(ctx.keys()): + context_info_num += 1 + + if "type" not in ctx[k]: + ctx[k]["type"] = default_ctx + + if "register" not in ctx[k]: + ctx[k]["register"] = [] + + reg_size = len(ctx[k]["register"]) + size = 0 + + if "minimal-size" in ctx: + size = ctx[k]["minimal-size"] + + size = max(size, reg_size) + + size = (size + 1) % 0xFFFE + + # Version + util.data_add(ctx_data, 0, 2) + + util.data_add(ctx_data, ctx[k]["type"], 2) + + util.data_add(ctx_data, 8 * size, 4) + + for r in ctx[k]["register"]: + util.data_add(ctx_data, r, 8) + + for i in range(reg_size, size): # pylint: disable=W0612 + util.data_add(ctx_data, 0, 8) + + # Vendor-specific bytes are not grouped + vendor_data = bytearray() + if vendor: + for k in sorted(vendor.keys()): + for b in vendor[k]["bytes"]: + util.data_add(vendor_data, b, 1) + + # Encode ARM Processor Error + data = bytearray() + + util.data_add(data, cper["valid"], 4) + + util.data_add(data, error_info_num, 2) + util.data_add(data, context_info_num, 2) + + # Calculate the length of the CPER data + cper_length = self.ACPI_GHES_ARM_CPER_LENGTH + cper_length += len(pei_data) + cper_length += len(vendor_data) + cper_length += len(ctx_data) + util.data_add(data, cper_length, 4) + + util.data_add(data, arg.get("affinity-level", 0), 1) + + # Reserved + util.data_add(data, 0, 3) + + if "midr-el1" not in arg: + if cpus: + cmd_arg = { + 'path': cpus[0], + 'property': "midr" + } + ret = qmp_cmd.send_cmd("qom-get", cmd_arg, may_open=True) + if isinstance(ret, int): + arg["midr-el1"] = ret + + util.data_add(data, arg.get("mpidr-el1", 0), 8) + util.data_add(data, arg.get("midr-el1", 0), 8) + util.data_add(data, cper["running-state"], 4) + util.data_add(data, arg.get("psci-state", 0), 4) + + # Add PEI + data.extend(pei_data) + data.extend(ctx_data) + data.extend(vendor_data) + + self.data = data + + qmp_cmd.send_cper(cper_guid.CPER_PROC_ARM, self.data) |