diff options
| author | Peter Maydell <peter.maydell@linaro.org> | 2018-08-25 10:11:54 +0100 |
|---|---|---|
| committer | Peter Maydell <peter.maydell@linaro.org> | 2018-08-25 10:11:54 +0100 |
| commit | cc9821fa9ac43728a7ece0a5e42e0147e6aadbf4 (patch) | |
| tree | cdeb23078fab646764f9c0ee38bca585d8be0f89 /util/unicode.c | |
| parent | e2e6fa67931fdba493e10cc55abcc99a65c92c7b (diff) | |
| parent | 37aded92c27d0e56cd27f1c29494fc9f8c873cdd (diff) | |
| download | focaccia-qemu-cc9821fa9ac43728a7ece0a5e42e0147e6aadbf4.tar.gz focaccia-qemu-cc9821fa9ac43728a7ece0a5e42e0147e6aadbf4.zip | |
Merge remote-tracking branch 'remotes/armbru/tags/pull-qobject-2018-08-24' into staging
QObject patches for 2018-08-24 # gpg: Signature made Fri 24 Aug 2018 20:28:53 BST # gpg: using RSA key 3870B400EB918653 # gpg: Good signature from "Markus Armbruster <armbru@redhat.com>" # gpg: aka "Markus Armbruster <armbru@pond.sub.org>" # Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867 4E5F 3870 B400 EB91 8653 * remotes/armbru/tags/pull-qobject-2018-08-24: (58 commits) json: Update references to RFC 7159 to RFC 8259 json: Support %% in JSON strings when interpolating json: Improve safety of qobject_from_jsonf_nofail() & friends json: Keep interpolation state in JSONParserContext tests/drive_del-test: Fix harmless JSON interpolation bug json: Clean up headers qobject: Drop superfluous includes of qemu-common.h json: Make JSONToken opaque outside json-parser.c json: Unbox tokens queue in JSONMessageParser json: Streamline json_message_process_token() json: Enforce token count and size limits more tightly qjson: Have qobject_from_json() & friends reject empty and blank json: Assert json_parser_parse() consumes all tokens on success json: Fix streamer not to ignore trailing unterminated structures json: Fix latent parser aborts at end of input qjson: Fix qobject_from_json() & friends for multiple values json: Improve names of lexer states related to numbers json: Replace %I64d, %I64u by %PRId64, %PRIu64 json: Leave rejecting invalid interpolation to parser json: Pass lexical errors and limit violations to callback ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'util/unicode.c')
| -rw-r--r-- | util/unicode.c | 69 |
1 files changed, 62 insertions, 7 deletions
diff --git a/util/unicode.c b/util/unicode.c index a812a35171..8580bc598b 100644 --- a/util/unicode.c +++ b/util/unicode.c @@ -13,6 +13,21 @@ #include "qemu/osdep.h" #include "qemu/unicode.h" +static bool is_valid_codepoint(int codepoint) +{ + if (codepoint > 0x10FFFFu) { + return false; /* beyond Unicode range */ + } + if ((codepoint >= 0xFDD0 && codepoint <= 0xFDEF) + || (codepoint & 0xFFFE) == 0xFFFE) { + return false; /* noncharacter */ + } + if (codepoint >= 0xD800 && codepoint <= 0xDFFF) { + return false; /* surrogate code point */ + } + return true; +} + /** * mod_utf8_codepoint: * @s: string encoded in modified UTF-8 @@ -83,13 +98,8 @@ int mod_utf8_codepoint(const char *s, size_t n, char **end) cp <<= 6; cp |= byte & 0x3F; } - if (cp > 0x10FFFF) { - cp = -1; /* beyond Unicode range */ - } else if ((cp >= 0xFDD0 && cp <= 0xFDEF) - || (cp & 0xFFFE) == 0xFFFE) { - cp = -1; /* noncharacter */ - } else if (cp >= 0xD800 && cp <= 0xDFFF) { - cp = -1; /* surrogate code point */ + if (!is_valid_codepoint(cp)) { + cp = -1; } else if (cp < min_cp[len - 2] && !(cp == 0 && len == 2)) { cp = -1; /* overlong, not \xC0\x80 */ } @@ -99,3 +109,48 @@ out: *end = (char *)p; return cp; } + +/** + * mod_utf8_encode: + * @buf: Destination buffer + * @bufsz: size of @buf, at least 5. + * @codepoint: Unicode codepoint to encode + * + * Convert Unicode codepoint @codepoint to modified UTF-8. + * + * Returns: the length of the UTF-8 sequence on success, -1 when + * @codepoint is invalid. + */ +ssize_t mod_utf8_encode(char buf[], size_t bufsz, int codepoint) +{ + assert(bufsz >= 5); + + if (!is_valid_codepoint(codepoint)) { + return -1; + } + + if (codepoint > 0 && codepoint <= 0x7F) { + buf[0] = codepoint & 0x7F; + buf[1] = 0; + return 1; + } + if (codepoint <= 0x7FF) { + buf[0] = 0xC0 | ((codepoint >> 6) & 0x1F); + buf[1] = 0x80 | (codepoint & 0x3F); + buf[2] = 0; + return 2; + } + if (codepoint <= 0xFFFF) { + buf[0] = 0xE0 | ((codepoint >> 12) & 0x0F); + buf[1] = 0x80 | ((codepoint >> 6) & 0x3F); + buf[2] = 0x80 | (codepoint & 0x3F); + buf[3] = 0; + return 3; + } + buf[0] = 0xF0 | ((codepoint >> 18) & 0x07); + buf[1] = 0x80 | ((codepoint >> 12) & 0x3F); + buf[2] = 0x80 | ((codepoint >> 6) & 0x3F); + buf[3] = 0x80 | (codepoint & 0x3F); + buf[4] = 0; + return 4; +} |