summary refs log tree commit diff stats
path: root/util/unicode.c
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2018-08-25 10:11:54 +0100
committerPeter Maydell <peter.maydell@linaro.org>2018-08-25 10:11:54 +0100
commitcc9821fa9ac43728a7ece0a5e42e0147e6aadbf4 (patch)
treecdeb23078fab646764f9c0ee38bca585d8be0f89 /util/unicode.c
parente2e6fa67931fdba493e10cc55abcc99a65c92c7b (diff)
parent37aded92c27d0e56cd27f1c29494fc9f8c873cdd (diff)
downloadfocaccia-qemu-cc9821fa9ac43728a7ece0a5e42e0147e6aadbf4.tar.gz
focaccia-qemu-cc9821fa9ac43728a7ece0a5e42e0147e6aadbf4.zip
Merge remote-tracking branch 'remotes/armbru/tags/pull-qobject-2018-08-24' into staging
QObject patches for 2018-08-24

# gpg: Signature made Fri 24 Aug 2018 20:28:53 BST
# gpg:                using RSA key 3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* remotes/armbru/tags/pull-qobject-2018-08-24: (58 commits)
  json: Update references to RFC 7159 to RFC 8259
  json: Support %% in JSON strings when interpolating
  json: Improve safety of qobject_from_jsonf_nofail() & friends
  json: Keep interpolation state in JSONParserContext
  tests/drive_del-test: Fix harmless JSON interpolation bug
  json: Clean up headers
  qobject: Drop superfluous includes of qemu-common.h
  json: Make JSONToken opaque outside json-parser.c
  json: Unbox tokens queue in JSONMessageParser
  json: Streamline json_message_process_token()
  json: Enforce token count and size limits more tightly
  qjson: Have qobject_from_json() & friends reject empty and blank
  json: Assert json_parser_parse() consumes all tokens on success
  json: Fix streamer not to ignore trailing unterminated structures
  json: Fix latent parser aborts at end of input
  qjson: Fix qobject_from_json() & friends for multiple values
  json: Improve names of lexer states related to numbers
  json: Replace %I64d, %I64u by %PRId64, %PRIu64
  json: Leave rejecting invalid interpolation to parser
  json: Pass lexical errors and limit violations to callback
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'util/unicode.c')
-rw-r--r--util/unicode.c69
1 files changed, 62 insertions, 7 deletions
diff --git a/util/unicode.c b/util/unicode.c
index a812a35171..8580bc598b 100644
--- a/util/unicode.c
+++ b/util/unicode.c
@@ -13,6 +13,21 @@
 #include "qemu/osdep.h"
 #include "qemu/unicode.h"
 
+static bool is_valid_codepoint(int codepoint)
+{
+    if (codepoint > 0x10FFFFu) {
+        return false;            /* beyond Unicode range */
+    }
+    if ((codepoint >= 0xFDD0 && codepoint <= 0xFDEF)
+        || (codepoint & 0xFFFE) == 0xFFFE) {
+        return false;            /* noncharacter */
+    }
+    if (codepoint >= 0xD800 && codepoint <= 0xDFFF) {
+        return false;            /* surrogate code point */
+    }
+    return true;
+}
+
 /**
  * mod_utf8_codepoint:
  * @s: string encoded in modified UTF-8
@@ -83,13 +98,8 @@ int mod_utf8_codepoint(const char *s, size_t n, char **end)
             cp <<= 6;
             cp |= byte & 0x3F;
         }
-        if (cp > 0x10FFFF) {
-            cp = -1;            /* beyond Unicode range */
-        } else if ((cp >= 0xFDD0 && cp <= 0xFDEF)
-                   || (cp & 0xFFFE) == 0xFFFE) {
-            cp = -1;            /* noncharacter */
-        } else if (cp >= 0xD800 && cp <= 0xDFFF) {
-            cp = -1;            /* surrogate code point */
+        if (!is_valid_codepoint(cp)) {
+            cp = -1;
         } else if (cp < min_cp[len - 2] && !(cp == 0 && len == 2)) {
             cp = -1;            /* overlong, not \xC0\x80 */
         }
@@ -99,3 +109,48 @@ out:
     *end = (char *)p;
     return cp;
 }
+
+/**
+ * mod_utf8_encode:
+ * @buf: Destination buffer
+ * @bufsz: size of @buf, at least 5.
+ * @codepoint: Unicode codepoint to encode
+ *
+ * Convert Unicode codepoint @codepoint to modified UTF-8.
+ *
+ * Returns: the length of the UTF-8 sequence on success, -1 when
+ * @codepoint is invalid.
+ */
+ssize_t mod_utf8_encode(char buf[], size_t bufsz, int codepoint)
+{
+    assert(bufsz >= 5);
+
+    if (!is_valid_codepoint(codepoint)) {
+        return -1;
+    }
+
+    if (codepoint > 0 && codepoint <= 0x7F) {
+        buf[0] = codepoint & 0x7F;
+        buf[1] = 0;
+        return 1;
+    }
+    if (codepoint <= 0x7FF) {
+        buf[0] = 0xC0 | ((codepoint >> 6) & 0x1F);
+        buf[1] = 0x80 | (codepoint & 0x3F);
+        buf[2] = 0;
+        return 2;
+    }
+    if (codepoint <= 0xFFFF) {
+        buf[0] = 0xE0 | ((codepoint >> 12) & 0x0F);
+        buf[1] = 0x80 | ((codepoint >> 6) & 0x3F);
+        buf[2] = 0x80 | (codepoint & 0x3F);
+        buf[3] = 0;
+        return 3;
+    }
+    buf[0] = 0xF0 | ((codepoint >> 18) & 0x07);
+    buf[1] = 0x80 | ((codepoint >> 12) & 0x3F);
+    buf[2] = 0x80 | ((codepoint >> 6) & 0x3F);
+    buf[3] = 0x80 | (codepoint & 0x3F);
+    buf[4] = 0;
+    return 4;
+}