diff options
| author | Richard Henderson <richard.henderson@linaro.org> | 2025-08-31 07:37:34 +1000 |
|---|---|---|
| committer | Richard Henderson <richard.henderson@linaro.org> | 2025-08-31 07:37:35 +1000 |
| commit | e101d33792530093fa0b0a6e5f43e4d8cfe4581e (patch) | |
| tree | e7ce5e8e229180d684045b887797ec3e85d2f077 /scripts/lib/kdoc/kdoc_re.py | |
| parent | 4791f22a5f5571cb248b1eddff98630545b3fd3e (diff) | |
| parent | 2e27650bddd35477d994a795a3b1cb57c8ed5c76 (diff) | |
| download | focaccia-qemu-e101d33792530093fa0b0a6e5f43e4d8cfe4581e.tar.gz focaccia-qemu-e101d33792530093fa0b0a6e5f43e4d8cfe4581e.zip | |
Merge tag 'pull-target-arm-20250830' of https://gitlab.com/pm215/qemu into staging
target-arm queue:
* Implement FEAT_SCTLR2
* Implement FEAT_TCR2
* Implement FEAT_CSSC
* Implement FEAT_LSE128
* Clean up of register field definitions
* Trap PMCR when MDCR_EL2.TPMCR is set
* tests/functional: update aarch64 RME test images
* hw/intc/arm_gicv3_kvm: preserve pending interrupts during cpr
* hw/arm: add static NVDIMMs in device tree
* hw/arm/stm32f205_soc: Don't leak TYPE_OR_IRQ objects
* scripts/kernel-doc: Avoid new Perl precedence warning
* scripts/kernel-doc: Update to kernel's new Python implementation
# -----BEGIN PGP SIGNATURE-----
#
# iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAmizIcAZHHBldGVyLm1h
# eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3tmCD/9Pe4Evw/I2e3Nqr4X87+KC
# JtX3s9U8Gly1ttnWd5a+fRubBqIvpxRsJYf0PJQi7otPGDq4E3TZ5UCRnInArpRh
# hJqyNxi2ELgDU0Z917UYMnxBwpv7+V/635V1/svSOWDf9RPHnf6GwrmlCvu4Llgf
# mVtDlQd+Ta5hoICM0VzrMZfTYevxGqi/cr/oVzCObKmh1YMpPTtSNlfYPMFcY7py
# JLu5e7YNN2krh19nCXieS3iqXMsFoLp31kXcCmKE1BgIKeVPNxTRMfOWa4uNDtUN
# 17iLfHLatNfSWUA1gvUHxv2maCdm4xJZdGowP/uYvzaemquFSjfM/8qaBxxFqZ1v
# 7jdZEzdnn1CX4Kmu3cPvhcuACyYRprlrKZYvCrTH4yCKbJsm0Uo7M66ia3EIF5EQ
# kehnGGwu3rv3qrliTXiXoAr7fC0OOiN0afAkS6a5lAi13s6M+Se2VElnRvIoXR2W
# 0Xw21/05p/WuXLoMNFjEpAaQgWYEc0kQhFAQczcZH+pyGlaU2QxCTTnaeuHUWcke
# y7OtpVBk4Fukaqd4gn0SQtYQLxeFq6vPOL4b1VKR5FuGDSucBUjuVl0dG4gkdbII
# yvCBaTb+IEY4fJ1E8IMTI3Lcydv9yblLyGXLr42e22x/l51SCZs1WvIx2i6u6VST
# lYnoOObEknvf25YAu3rDTw==
# =VItP
# -----END PGP SIGNATURE-----
# gpg: Signature made Sun 31 Aug 2025 02:07:28 AM AEST
# gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE
# gpg: issuer "peter.maydell@linaro.org"
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [unknown]
# gpg: aka "Peter Maydell <pmaydell@gmail.com>" [unknown]
# gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [unknown]
# gpg: aka "Peter Maydell <peter@archaic.org.uk>" [unknown]
# gpg: WARNING: The key's User ID is not certified with a trusted signature!
# gpg: There is no indication that the signature belongs to the owner.
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE
* tag 'pull-target-arm-20250830' of https://gitlab.com/pm215/qemu: (32 commits)
hw/arm/stm32f205_soc: Don't leak TYPE_OR_IRQ objects
target/arm: Enable FEAT_LSE128 for -cpu max
target/arm: Implement FEAT_LSE128
target/arm: Rename isar_feature_aa64_atomics
tcg: Add tcg_gen_atomic_{xchg,fetch_and,fetch_or}_i128
accel/tcg: Add cpu_atomic_*_mmu for 16-byte xchg, fetch_and, fetch_or
qemu/atomic: Add atomic16 primitives for xchg, fetch_and, fetch_or
qemu/atomic: Finish renaming atomic128-cas.h headers
target/arm: Correct condition of aa64_atomics feature function
MAINTAINERS: Put kernel-doc under the "docs build machinery" section
scripts/kernel-doc: Delete the old Perl kernel-doc script
scripts/kerneldoc: Switch to the Python kernel-doc script
scripts/kernel-doc: tweak for QEMU coding standards
scripts/kernel-doc: strip QEMU_ from function definitions
scripts: Import Python kerneldoc from Linux kernel
tests/qtest/libqtest.h: Remove stray space from doc comment
docs/sphinx/kerneldoc.py: Handle new LINENO syntax
scripts/kernel-doc: Avoid new Perl precedence warning
hw/arm: add static NVDIMMs in device tree
target/arm: Enable FEAT_CSSC for -cpu max
...
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to '')
| -rw-r--r-- | scripts/lib/kdoc/kdoc_re.py | 270 |
1 files changed, 270 insertions, 0 deletions
diff --git a/scripts/lib/kdoc/kdoc_re.py b/scripts/lib/kdoc/kdoc_re.py new file mode 100644 index 0000000000..612223e1e7 --- /dev/null +++ b/scripts/lib/kdoc/kdoc_re.py @@ -0,0 +1,270 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. + +""" +Regular expression ancillary classes. + +Those help caching regular expressions and do matching for kernel-doc. +""" + +import re + +# Local cache for regular expressions +re_cache = {} + + +class KernRe: + """ + Helper class to simplify regex declaration and usage, + + It calls re.compile for a given pattern. It also allows adding + regular expressions and define sub at class init time. + + Regular expressions can be cached via an argument, helping to speedup + searches. + """ + + def _add_regex(self, string, flags): + """ + Adds a new regex or re-use it from the cache. + """ + self.regex = re_cache.get(string, None) + if not self.regex: + self.regex = re.compile(string, flags=flags) + if self.cache: + re_cache[string] = self.regex + + def __init__(self, string, cache=True, flags=0): + """ + Compile a regular expression and initialize internal vars. + """ + + self.cache = cache + self.last_match = None + + self._add_regex(string, flags) + + def __str__(self): + """ + Return the regular expression pattern. + """ + return self.regex.pattern + + def __add__(self, other): + """ + Allows adding two regular expressions into one. + """ + + return KernRe(str(self) + str(other), cache=self.cache or other.cache, + flags=self.regex.flags | other.regex.flags) + + def match(self, string): + """ + Handles a re.match storing its results + """ + + self.last_match = self.regex.match(string) + return self.last_match + + def search(self, string): + """ + Handles a re.search storing its results + """ + + self.last_match = self.regex.search(string) + return self.last_match + + def findall(self, string): + """ + Alias to re.findall + """ + + return self.regex.findall(string) + + def split(self, string): + """ + Alias to re.split + """ + + return self.regex.split(string) + + def sub(self, sub, string, count=0): + """ + Alias to re.sub + """ + + return self.regex.sub(sub, string, count=count) + + def group(self, num): + """ + Returns the group results of the last match + """ + + return self.last_match.group(num) + + +class NestedMatch: + """ + Finding nested delimiters is hard with regular expressions. It is + even harder on Python with its normal re module, as there are several + advanced regular expressions that are missing. + + This is the case of this pattern: + + '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;' + + which is used to properly match open/close parenthesis of the + string search STRUCT_GROUP(), + + Add a class that counts pairs of delimiters, using it to match and + replace nested expressions. + + The original approach was suggested by: + https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex + + Although I re-implemented it to make it more generic and match 3 types + of delimiters. The logic checks if delimiters are paired. If not, it + will ignore the search string. + """ + + # TODO: make NestedMatch handle multiple match groups + # + # Right now, regular expressions to match it are defined only up to + # the start delimiter, e.g.: + # + # \bSTRUCT_GROUP\( + # + # is similar to: STRUCT_GROUP\((.*)\) + # except that the content inside the match group is delimiter's aligned. + # + # The content inside parenthesis are converted into a single replace + # group (e.g. r`\1'). + # + # It would be nice to change such definition to support multiple + # match groups, allowing a regex equivalent to. + # + # FOO\((.*), (.*), (.*)\) + # + # it is probably easier to define it not as a regular expression, but + # with some lexical definition like: + # + # FOO(arg1, arg2, arg3) + + DELIMITER_PAIRS = { + '{': '}', + '(': ')', + '[': ']', + } + + RE_DELIM = re.compile(r'[\{\}\[\]\(\)]') + + def _search(self, regex, line): + """ + Finds paired blocks for a regex that ends with a delimiter. + + The suggestion of using finditer to match pairs came from: + https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex + but I ended using a different implementation to align all three types + of delimiters and seek for an initial regular expression. + + The algorithm seeks for open/close paired delimiters and place them + into a stack, yielding a start/stop position of each match when the + stack is zeroed. + + The algorithm shoud work fine for properly paired lines, but will + silently ignore end delimiters that preceeds an start delimiter. + This should be OK for kernel-doc parser, as unaligned delimiters + would cause compilation errors. So, we don't need to rise exceptions + to cover such issues. + """ + + stack = [] + + for match_re in regex.finditer(line): + start = match_re.start() + offset = match_re.end() + + d = line[offset - 1] + if d not in self.DELIMITER_PAIRS: + continue + + end = self.DELIMITER_PAIRS[d] + stack.append(end) + + for match in self.RE_DELIM.finditer(line[offset:]): + pos = match.start() + offset + + d = line[pos] + + if d in self.DELIMITER_PAIRS: + end = self.DELIMITER_PAIRS[d] + + stack.append(end) + continue + + # Does the end delimiter match what it is expected? + if stack and d == stack[-1]: + stack.pop() + + if not stack: + yield start, offset, pos + 1 + break + + def search(self, regex, line): + """ + This is similar to re.search: + + It matches a regex that it is followed by a delimiter, + returning occurrences only if all delimiters are paired. + """ + + for t in self._search(regex, line): + + yield line[t[0]:t[2]] + + def sub(self, regex, sub, line, count=0): + """ + This is similar to re.sub: + + It matches a regex that it is followed by a delimiter, + replacing occurrences only if all delimiters are paired. + + if r'\1' is used, it works just like re: it places there the + matched paired data with the delimiter stripped. + + If count is different than zero, it will replace at most count + items. + """ + out = "" + + cur_pos = 0 + n = 0 + + for start, end, pos in self._search(regex, line): + out += line[cur_pos:start] + + # Value, ignoring start/end delimiters + value = line[end:pos - 1] + + # replaces \1 at the sub string, if \1 is used there + new_sub = sub + new_sub = new_sub.replace(r'\1', value) + + out += new_sub + + # Drop end ';' if any + if line[pos] == ';': + pos += 1 + + cur_pos = pos + n += 1 + + if count and count >= n: + break + + # Append the remaining string + l = len(line) + out += line[cur_pos:l] + + return out |