From 00ea57fadcb899b7e21079dacb47b6fb8af5b2fa Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:39:47 +0200 Subject: json: Tighten and simplify qstring_from_escaped_str()'s loop Simplify loop control, and assert that the string ends with the appropriate quote (the lexer ensures it does). Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-21-armbru@redhat.com> --- qobject/json-parser.c | 30 +++++++----------------------- 1 file changed, 7 insertions(+), 23 deletions(-) (limited to 'qobject/json-parser.c') diff --git a/qobject/json-parser.c b/qobject/json-parser.c index a5aa790d62..164b86769b 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -132,65 +132,49 @@ static QString *qstring_from_escaped_str(JSONParserContext *ctxt, { const char *ptr = token->str; QString *str; - int double_quote = 1; - - if (*ptr == '"') { - double_quote = 1; - } else { - double_quote = 0; - } - ptr++; + char quote; + assert(*ptr == '"' || *ptr == '\''); + quote = *ptr++; str = qstring_new(); - while (*ptr && - ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) { + + while (*ptr != quote) { + assert(*ptr); if (*ptr == '\\') { ptr++; - - switch (*ptr) { + switch (*ptr++) { case '"': qstring_append(str, "\""); - ptr++; break; case '\'': qstring_append(str, "'"); - ptr++; break; case '\\': qstring_append(str, "\\"); - ptr++; break; case '/': qstring_append(str, "/"); - ptr++; break; case 'b': qstring_append(str, "\b"); - ptr++; break; case 'f': qstring_append(str, "\f"); - ptr++; break; case 'n': qstring_append(str, "\n"); - ptr++; break; case 'r': qstring_append(str, "\r"); - ptr++; break; case 't': qstring_append(str, "\t"); - ptr++; break; case 'u': { uint16_t unicode_char = 0; char utf8_char[4]; int i = 0; - ptr++; - for (i = 0; i < 4; i++) { if (qemu_isxdigit(*ptr)) { unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4); -- cgit 1.4.1 From e59f39d40397645477b959255aedfa17a7c9c779 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:39:49 +0200 Subject: json: Reject invalid UTF-8 sequences We reject bytes that can't occur in valid UTF-8 (\xC0..\xC1, \xF5..\xFF in the lexer. That's insufficient; there's plenty of invalid UTF-8 not containing these bytes, as demonstrated by check-qjson: * Malformed sequences - Unexpected continuation bytes - Missing continuation bytes after start bytes other than \xC0..\xC1, \xF5..\xFD. * Overlong sequences with start bytes other than \xC0..\xC1, \xF5..\xFD. * Invalid code points Fixing this in the lexer would be bothersome. Fixing it in the parser is straightforward, so do that. Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-23-armbru@redhat.com> --- include/qemu/unicode.h | 1 + qobject/json-parser.c | 20 +++++--- tests/check-qjson.c | 137 ++++++++++++++++--------------------------------- util/unicode.c | 69 ++++++++++++++++++++++--- 4 files changed, 122 insertions(+), 105 deletions(-) (limited to 'qobject/json-parser.c') diff --git a/include/qemu/unicode.h b/include/qemu/unicode.h index 71c72db461..7fa10b8e60 100644 --- a/include/qemu/unicode.h +++ b/include/qemu/unicode.h @@ -2,5 +2,6 @@ #define QEMU_UNICODE_H int mod_utf8_codepoint(const char *s, size_t n, char **end); +ssize_t mod_utf8_encode(char buf[], size_t bufsz, int codepoint); #endif diff --git a/qobject/json-parser.c b/qobject/json-parser.c index 164b86769b..0e232ff101 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -13,6 +13,7 @@ #include "qemu/osdep.h" #include "qemu/cutils.h" +#include "qemu/unicode.h" #include "qapi/error.h" #include "qemu-common.h" #include "qapi/qmp/qbool.h" @@ -133,6 +134,10 @@ static QString *qstring_from_escaped_str(JSONParserContext *ctxt, const char *ptr = token->str; QString *str; char quote; + int cp; + char *end; + ssize_t len; + char utf8_buf[5]; assert(*ptr == '"' || *ptr == '\''); quote = *ptr++; @@ -194,12 +199,15 @@ static QString *qstring_from_escaped_str(JSONParserContext *ctxt, goto out; } } else { - char dummy[2]; - - dummy[0] = *ptr++; - dummy[1] = 0; - - qstring_append(str, dummy); + cp = mod_utf8_codepoint(ptr, 6, &end); + if (cp <= 0) { + parse_error(ctxt, token, "invalid UTF-8 sequence in string"); + goto out; + } + ptr = end; + len = mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp); + assert(len >= 0); + qstring_append(str, utf8_buf); } } diff --git a/tests/check-qjson.c b/tests/check-qjson.c index 69f5a187c9..71c77d2f70 100644 --- a/tests/check-qjson.c +++ b/tests/check-qjson.c @@ -152,13 +152,6 @@ static void string_with_quotes(void) static void utf8_string(void) { /* - * FIXME Current behavior for invalid UTF-8 sequences is - * incorrect. This test expects current, incorrect results. - * They're all marked "bug:" below, and are to be replaced by - * correct ones as the bugs get fixed. - * - * The JSON parser rejects some, but not all invalid sequences. - * * Problem: we can't easily deal with embedded U+0000. Parsing * the JSON string "this \\u0000" is fun" yields "this \0 is fun", * which gets misinterpreted as NUL-terminated "this ". We should @@ -177,12 +170,6 @@ static void utf8_string(void) /* Expected unparse output, defaults to @json_in */ const char *json_out; } test_cases[] = { - /* - * Bug markers used here: - * - bug: not rejected - * JSON parser fails to reject invalid sequence(s) - */ - /* 0 Control characters */ { /* @@ -330,7 +317,7 @@ static void utf8_string(void) { /* first one beyond Unicode range: U+110000 */ "\xF4\x90\x80\x80", - "\xF4\x90\x80\x80", + NULL, "\\uFFFD", }, /* 3 Malformed sequences */ @@ -338,49 +325,49 @@ static void utf8_string(void) /* 3.1.1 First continuation byte */ { "\x80", - "\x80", /* bug: not rejected */ + NULL, "\\uFFFD", }, /* 3.1.2 Last continuation byte */ { "\xBF", - "\xBF", /* bug: not rejected */ + NULL, "\\uFFFD", }, /* 3.1.3 2 continuation bytes */ { "\x80\xBF", - "\x80\xBF", /* bug: not rejected */ + NULL, "\\uFFFD\\uFFFD", }, /* 3.1.4 3 continuation bytes */ { "\x80\xBF\x80", - "\x80\xBF\x80", /* bug: not rejected */ + NULL, "\\uFFFD\\uFFFD\\uFFFD", }, /* 3.1.5 4 continuation bytes */ { "\x80\xBF\x80\xBF", - "\x80\xBF\x80\xBF", /* bug: not rejected */ + NULL, "\\uFFFD\\uFFFD\\uFFFD\\uFFFD", }, /* 3.1.6 5 continuation bytes */ { "\x80\xBF\x80\xBF\x80", - "\x80\xBF\x80\xBF\x80", /* bug: not rejected */ + NULL, "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD", }, /* 3.1.7 6 continuation bytes */ { "\x80\xBF\x80\xBF\x80\xBF", - "\x80\xBF\x80\xBF\x80\xBF", /* bug: not rejected */ + NULL, "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD", }, /* 3.1.8 7 continuation bytes */ { "\x80\xBF\x80\xBF\x80\xBF\x80", - "\x80\xBF\x80\xBF\x80\xBF\x80", /* bug: not rejected */ + NULL, "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD", }, /* 3.1.9 Sequence of all 64 possible continuation bytes */ @@ -393,16 +380,7 @@ static void utf8_string(void) "\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF" "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7" "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF", - /* bug: not rejected */ - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F" - "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7" - "\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF" - "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7" - "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF", - "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD" + NULL, "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD" "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD" "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD" @@ -410,6 +388,7 @@ static void utf8_string(void) "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD" "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD" "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD" + "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD", }, /* 3.2 Lonely start characters */ /* 3.2.1 All 32 first bytes of 2-byte sequences, followed by space */ @@ -418,7 +397,7 @@ static void utf8_string(void) "\xC8 \xC9 \xCA \xCB \xCC \xCD \xCE \xCF " "\xD0 \xD1 \xD2 \xD3 \xD4 \xD5 \xD6 \xD7 " "\xD8 \xD9 \xDA \xDB \xDC \xDD \xDE \xDF ", - NULL, /* bug: accepted partly, see FIXME below */ + NULL, "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD " "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD " "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD " @@ -428,16 +407,14 @@ static void utf8_string(void) { "\xE0 \xE1 \xE2 \xE3 \xE4 \xE5 \xE6 \xE7 " "\xE8 \xE9 \xEA \xEB \xEC \xED \xEE \xEF ", - /* bug: not rejected */ - "\xE0 \xE1 \xE2 \xE3 \xE4 \xE5 \xE6 \xE7 " - "\xE8 \xE9 \xEA \xEB \xEC \xED \xEE \xEF ", + NULL, "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD " "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD ", }, /* 3.2.3 All 8 first bytes of 4-byte sequences, followed by space */ { "\xF0 \xF1 \xF2 \xF3 \xF4 \xF5 \xF6 \xF7 ", - NULL, /* bug: accepted partly, see FIXME below */ + NULL, "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD ", }, /* 3.2.4 All 4 first bytes of 5-byte sequences, followed by space */ @@ -462,13 +439,13 @@ static void utf8_string(void) /* 3.3.2 3-byte sequence with last byte missing (U+0000) */ { "\xE0\x80", - "\xE0\x80", /* bug: not rejected */ + NULL, "\\uFFFD", }, /* 3.3.3 4-byte sequence with last byte missing (U+0000) */ { "\xF0\x80\x80", - "\xF0\x80\x80", /* bug: not rejected */ + NULL, "\\uFFFD", }, /* 3.3.4 5-byte sequence with last byte missing (U+0000) */ @@ -486,13 +463,13 @@ static void utf8_string(void) /* 3.3.6 2-byte sequence with last byte missing (U+07FF) */ { "\xDF", - "\xDF", /* bug: not rejected */ + NULL, "\\uFFFD", }, /* 3.3.7 3-byte sequence with last byte missing (U+FFFF) */ { "\xEF\xBF", - "\xEF\xBF", /* bug: not rejected */ + NULL, "\\uFFFD", }, /* 3.3.8 4-byte sequence with last byte missing (U+1FFFFF) */ @@ -517,7 +494,7 @@ static void utf8_string(void) { "\xC0\xE0\x80\xF0\x80\x80\xF8\x80\x80\x80\xFC\x80\x80\x80\x80" "\xDF\xEF\xBF\xF7\xBF\xBF\xFB\xBF\xBF\xBF\xFD\xBF\xBF\xBF\xBF", - NULL, /* bug: accepted partly, see FIXME below */ + NULL, "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD" "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD", }, @@ -546,12 +523,12 @@ static void utf8_string(void) }, { "\xE0\x80\xAF", - "\xE0\x80\xAF", /* bug: not rejected */ + NULL, "\\uFFFD", }, { "\xF0\x80\x80\xAF", - "\xF0\x80\x80\xAF", /* bug: not rejected */ + NULL, "\\uFFFD", }, { @@ -579,7 +556,7 @@ static void utf8_string(void) { /* \U+07FF */ "\xE0\x9F\xBF", - "\xE0\x9F\xBF", /* bug: not rejected */ + NULL, "\\uFFFD", }, { @@ -590,7 +567,7 @@ static void utf8_string(void) * also 2.2.3 */ "\xF0\x8F\xBF\xBC", - "\xF0\x8F\xBF\xBC", /* bug: not rejected */ + NULL, "\\uFFFD", }, { @@ -615,13 +592,13 @@ static void utf8_string(void) { /* \U+0000 */ "\xE0\x80\x80", - "\xE0\x80\x80", /* bug: not rejected */ + NULL, "\\uFFFD", }, { /* \U+0000 */ "\xF0\x80\x80\x80", - "\xF0\x80\x80\x80", /* bug: not rejected */ + NULL, "\\uFFFD", }, { @@ -641,92 +618,92 @@ static void utf8_string(void) { /* \U+D800 */ "\xED\xA0\x80", - "\xED\xA0\x80", /* bug: not rejected */ + NULL, "\\uFFFD", }, { /* \U+DB7F */ "\xED\xAD\xBF", - "\xED\xAD\xBF", /* bug: not rejected */ + NULL, "\\uFFFD", }, { /* \U+DB80 */ "\xED\xAE\x80", - "\xED\xAE\x80", /* bug: not rejected */ + NULL, "\\uFFFD", }, { /* \U+DBFF */ "\xED\xAF\xBF", - "\xED\xAF\xBF", /* bug: not rejected */ + NULL, "\\uFFFD", }, { /* \U+DC00 */ "\xED\xB0\x80", - "\xED\xB0\x80", /* bug: not rejected */ + NULL, "\\uFFFD", }, { /* \U+DF80 */ "\xED\xBE\x80", - "\xED\xBE\x80", /* bug: not rejected */ + NULL, "\\uFFFD", }, { /* \U+DFFF */ "\xED\xBF\xBF", - "\xED\xBF\xBF", /* bug: not rejected */ + NULL, "\\uFFFD", }, /* 5.2 Paired UTF-16 surrogates */ { /* \U+D800\U+DC00 */ "\xED\xA0\x80\xED\xB0\x80", - "\xED\xA0\x80\xED\xB0\x80", /* bug: not rejected */ + NULL, "\\uFFFD\\uFFFD", }, { /* \U+D800\U+DFFF */ "\xED\xA0\x80\xED\xBF\xBF", - "\xED\xA0\x80\xED\xBF\xBF", /* bug: not rejected */ + NULL, "\\uFFFD\\uFFFD", }, { /* \U+DB7F\U+DC00 */ "\xED\xAD\xBF\xED\xB0\x80", - "\xED\xAD\xBF\xED\xB0\x80", /* bug: not rejected */ + NULL, "\\uFFFD\\uFFFD", }, { /* \U+DB7F\U+DFFF */ "\xED\xAD\xBF\xED\xBF\xBF", - "\xED\xAD\xBF\xED\xBF\xBF", /* bug: not rejected */ + NULL, "\\uFFFD\\uFFFD", }, { /* \U+DB80\U+DC00 */ "\xED\xAE\x80\xED\xB0\x80", - "\xED\xAE\x80\xED\xB0\x80", /* bug: not rejected */ + NULL, "\\uFFFD\\uFFFD", }, { /* \U+DB80\U+DFFF */ "\xED\xAE\x80\xED\xBF\xBF", - "\xED\xAE\x80\xED\xBF\xBF", /* bug: not rejected */ + NULL, "\\uFFFD\\uFFFD", }, { /* \U+DBFF\U+DC00 */ "\xED\xAF\xBF\xED\xB0\x80", - "\xED\xAF\xBF\xED\xB0\x80", /* bug: not rejected */ + NULL, "\\uFFFD\\uFFFD", }, { /* \U+DBFF\U+DFFF */ "\xED\xAF\xBF\xED\xBF\xBF", - "\xED\xAF\xBF\xED\xBF\xBF", /* bug: not rejected */ + NULL, "\\uFFFD\\uFFFD", }, /* 5.3 Other illegal code positions */ @@ -734,25 +711,25 @@ static void utf8_string(void) { /* \U+FFFE */ "\xEF\xBF\xBE", - "\xEF\xBF\xBE", /* bug: not rejected */ + NULL, "\\uFFFD", }, { /* \U+FFFF */ "\xEF\xBF\xBF", - "\xEF\xBF\xBF", /* bug: not rejected */ + NULL, "\\uFFFD", }, { /* U+FDD0 */ "\xEF\xB7\x90", - "\xEF\xB7\x90", /* bug: not rejected */ + NULL, "\\uFFFD", }, { /* U+FDEF */ "\xEF\xB7\xAF", - "\xEF\xB7\xAF", /* bug: not rejected */ + NULL, "\\uFFFD", }, /* Plane 1 .. 16 noncharacters */ @@ -774,23 +751,7 @@ static void utf8_string(void) "\xF3\xAF\xBF\xBE\xF3\xAF\xBF\xBF" "\xF3\xBF\xBF\xBE\xF3\xBF\xBF\xBF" "\xF4\x8F\xBF\xBE\xF4\x8F\xBF\xBF", - /* bug: not rejected */ - "\xF0\x9F\xBF\xBE\xF0\x9F\xBF\xBF" - "\xF0\xAF\xBF\xBE\xF0\xAF\xBF\xBF" - "\xF0\xBF\xBF\xBE\xF0\xBF\xBF\xBF" - "\xF1\x8F\xBF\xBE\xF1\x8F\xBF\xBF" - "\xF1\x9F\xBF\xBE\xF1\x9F\xBF\xBF" - "\xF1\xAF\xBF\xBE\xF1\xAF\xBF\xBF" - "\xF1\xBF\xBF\xBE\xF1\xBF\xBF\xBF" - "\xF2\x8F\xBF\xBE\xF2\x8F\xBF\xBF" - "\xF2\x9F\xBF\xBE\xF2\x9F\xBF\xBF" - "\xF2\xAF\xBF\xBE\xF2\xAF\xBF\xBF" - "\xF2\xBF\xBF\xBE\xF2\xBF\xBF\xBF" - "\xF3\x8F\xBF\xBE\xF3\x8F\xBF\xBF" - "\xF3\x9F\xBF\xBE\xF3\x9F\xBF\xBF" - "\xF3\xAF\xBF\xBE\xF3\xAF\xBF\xBF" - "\xF3\xBF\xBF\xBE\xF3\xBF\xBF\xBF" - "\xF4\x8F\xBF\xBE\xF4\x8F\xBF\xBF", + NULL, "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD" "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD" "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD" @@ -829,14 +790,6 @@ static void utf8_string(void) } in = strndup(tail, end - tail); str = from_json_str(in, j, NULL); - /* - * FIXME JSON parser accepts invalid sequence - * starting with \xC2..\xF4 - */ - if (*in >= '\xC2' && *in <= '\xF4') { - g_free(str); - str = NULL; - } g_assert(!str); g_free(in); } diff --git a/util/unicode.c b/util/unicode.c index a812a35171..8580bc598b 100644 --- a/util/unicode.c +++ b/util/unicode.c @@ -13,6 +13,21 @@ #include "qemu/osdep.h" #include "qemu/unicode.h" +static bool is_valid_codepoint(int codepoint) +{ + if (codepoint > 0x10FFFFu) { + return false; /* beyond Unicode range */ + } + if ((codepoint >= 0xFDD0 && codepoint <= 0xFDEF) + || (codepoint & 0xFFFE) == 0xFFFE) { + return false; /* noncharacter */ + } + if (codepoint >= 0xD800 && codepoint <= 0xDFFF) { + return false; /* surrogate code point */ + } + return true; +} + /** * mod_utf8_codepoint: * @s: string encoded in modified UTF-8 @@ -83,13 +98,8 @@ int mod_utf8_codepoint(const char *s, size_t n, char **end) cp <<= 6; cp |= byte & 0x3F; } - if (cp > 0x10FFFF) { - cp = -1; /* beyond Unicode range */ - } else if ((cp >= 0xFDD0 && cp <= 0xFDEF) - || (cp & 0xFFFE) == 0xFFFE) { - cp = -1; /* noncharacter */ - } else if (cp >= 0xD800 && cp <= 0xDFFF) { - cp = -1; /* surrogate code point */ + if (!is_valid_codepoint(cp)) { + cp = -1; } else if (cp < min_cp[len - 2] && !(cp == 0 && len == 2)) { cp = -1; /* overlong, not \xC0\x80 */ } @@ -99,3 +109,48 @@ out: *end = (char *)p; return cp; } + +/** + * mod_utf8_encode: + * @buf: Destination buffer + * @bufsz: size of @buf, at least 5. + * @codepoint: Unicode codepoint to encode + * + * Convert Unicode codepoint @codepoint to modified UTF-8. + * + * Returns: the length of the UTF-8 sequence on success, -1 when + * @codepoint is invalid. + */ +ssize_t mod_utf8_encode(char buf[], size_t bufsz, int codepoint) +{ + assert(bufsz >= 5); + + if (!is_valid_codepoint(codepoint)) { + return -1; + } + + if (codepoint > 0 && codepoint <= 0x7F) { + buf[0] = codepoint & 0x7F; + buf[1] = 0; + return 1; + } + if (codepoint <= 0x7FF) { + buf[0] = 0xC0 | ((codepoint >> 6) & 0x1F); + buf[1] = 0x80 | (codepoint & 0x3F); + buf[2] = 0; + return 2; + } + if (codepoint <= 0xFFFF) { + buf[0] = 0xE0 | ((codepoint >> 12) & 0x0F); + buf[1] = 0x80 | ((codepoint >> 6) & 0x3F); + buf[2] = 0x80 | (codepoint & 0x3F); + buf[3] = 0; + return 3; + } + buf[0] = 0xF0 | ((codepoint >> 18) & 0x07); + buf[1] = 0x80 | ((codepoint >> 12) & 0x3F); + buf[2] = 0x80 | ((codepoint >> 6) & 0x3F); + buf[3] = 0x80 | (codepoint & 0x3F); + buf[4] = 0; + return 4; +} -- cgit 1.4.1 From 574bf16ff1f836a442d8de1853eb248c218a591d Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:39:50 +0200 Subject: json: Report first rather than last parse error Quiz time! When a parser reports multiple errors, but the user gets to see just one, which one is (on average) the least useful one? Yes, you're right, it's the last one! You're clearly familiar with compilers. Which one does QEMU report? Right again, the last one! You're clearly familiar with QEMU. Reproducer: feeding {"abc\xC2ijk": 1}\n to QMP produces {"error": {"class": "GenericError", "desc": "JSON parse error, key is not a string in object"}} Report the first error instead. The reproducer now produces {"error": {"class": "GenericError", "desc": "JSON parse error, invalid UTF-8 sequence in string"}} Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-24-armbru@redhat.com> --- qobject/json-parser.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'qobject/json-parser.c') diff --git a/qobject/json-parser.c b/qobject/json-parser.c index 0e232ff101..b77931614b 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -54,13 +54,13 @@ static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt, { va_list ap; char message[1024]; + + if (ctxt->err) { + return; + } va_start(ap, msg); vsnprintf(message, sizeof(message), msg, ap); va_end(ap); - if (ctxt->err) { - error_free(ctxt->err); - ctxt->err = NULL; - } error_setg(&ctxt->err, "JSON parse error, %s", message); } -- cgit 1.4.1 From 4b1c0cd7c7f9f9cf2e46c0a9c9cd88b2cba3decd Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:39:52 +0200 Subject: json: Accept overlong \xC0\x80 as U+0000 ("modified UTF-8") Since the JSON grammer doesn't accept U+0000 anywhere, this merely exchanges one kind of parse error for another. It's purely for consistency with qobject_to_json(), which accepts \xC0\x80 (see commit e2ec3f97680). Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-26-armbru@redhat.com> --- qobject/json-lexer.c | 2 +- qobject/json-parser.c | 2 +- tests/check-qjson.c | 8 +------- 3 files changed, 3 insertions(+), 9 deletions(-) (limited to 'qobject/json-parser.c') diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c index 93fa2737e6..4c402f62d3 100644 --- a/qobject/json-lexer.c +++ b/qobject/json-lexer.c @@ -93,7 +93,7 @@ * interpolation = %((l|ll|I64)[du]|[ipsf]) * * Note: - * - Input must be encoded in UTF-8. + * - Input must be encoded in modified UTF-8. * - Decoding and validating is left to the parser. */ diff --git a/qobject/json-parser.c b/qobject/json-parser.c index b77931614b..a9b227f56c 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -200,7 +200,7 @@ static QString *qstring_from_escaped_str(JSONParserContext *ctxt, } } else { cp = mod_utf8_codepoint(ptr, 6, &end); - if (cp <= 0) { + if (cp < 0) { parse_error(ctxt, token, "invalid UTF-8 sequence in string"); goto out; } diff --git a/tests/check-qjson.c b/tests/check-qjson.c index 71c77d2f70..3abf12b4d2 100644 --- a/tests/check-qjson.c +++ b/tests/check-qjson.c @@ -152,12 +152,6 @@ static void string_with_quotes(void) static void utf8_string(void) { /* - * Problem: we can't easily deal with embedded U+0000. Parsing - * the JSON string "this \\u0000" is fun" yields "this \0 is fun", - * which gets misinterpreted as NUL-terminated "this ". We should - * consider using overlong encoding \xC0\x80 for U+0000 ("modified - * UTF-8"). - * * Most test cases are scraped from Markus Kuhn's UTF-8 decoder * capability and stress test at * http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt @@ -586,7 +580,7 @@ static void utf8_string(void) { /* \U+0000 */ "\xC0\x80", - NULL, + "\xC0\x80", "\\u0000", }, { -- cgit 1.4.1 From b2da4a4d7537567b44db60b7b79cd14f64e48f2f Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:39:53 +0200 Subject: json: Leave rejecting invalid escape sequences to parser Both lexer and parser reject invalid escape sequences in strings. The parser's check is useless. The lexer ends the token right after the first non-well-formed byte. This tends to lead to suboptimal error reporting. For instance, input {"abc\@ijk": 1} produces the tokens JSON_LCURLY { JSON_ERROR "abc\@ JSON_KEYWORD ijk JSON_ERROR ": 1}\n The parser then reports three errors Invalid JSON syntax JSON parse error, invalid keyword 'ijk' Invalid JSON syntax before it recovers at the newline. Drop the lexer's escape sequence checking, and make it accept the same characters after backslash it accepts elsewhere in strings. It now produces JSON_LCURLY { JSON_STRING "abc\@ijk" JSON_COLON : JSON_INTEGER 1 JSON_RCURLY and the parser reports just JSON parse error, invalid escape sequence in string While there, fix parse_string()'s inaccurate function comment. Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-27-armbru@redhat.com> --- qobject/json-lexer.c | 72 +++------------------------------------------------ qobject/json-parser.c | 56 +++++++++++++++++++++++---------------- 2 files changed, 37 insertions(+), 91 deletions(-) (limited to 'qobject/json-parser.c') diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c index 4c402f62d3..0731779470 100644 --- a/qobject/json-lexer.c +++ b/qobject/json-lexer.c @@ -80,6 +80,8 @@ * escape = %x5C ; \ * quotation-mark = %x22 ; " * unescaped = %x20-21 / %x23-5B / %x5D-10FFFF + * [This lexer accepts any non-control character after escape, and + * leaves rejecting invalid ones to the parser.] * * * Extensions over RFC 8259: @@ -99,16 +101,8 @@ enum json_lexer_state { IN_ERROR = 0, /* must really be 0, see json_lexer[] */ - IN_DQ_UCODE3, - IN_DQ_UCODE2, - IN_DQ_UCODE1, - IN_DQ_UCODE0, IN_DQ_STRING_ESCAPE, IN_DQ_STRING, - IN_SQ_UCODE3, - IN_SQ_UCODE2, - IN_SQ_UCODE1, - IN_SQ_UCODE0, IN_SQ_STRING_ESCAPE, IN_SQ_STRING, IN_ZERO, @@ -144,37 +138,8 @@ static const uint8_t json_lexer[][256] = { /* Relies on default initialization to IN_ERROR! */ /* double quote string */ - [IN_DQ_UCODE3] = { - ['0' ... '9'] = IN_DQ_STRING, - ['a' ... 'f'] = IN_DQ_STRING, - ['A' ... 'F'] = IN_DQ_STRING, - }, - [IN_DQ_UCODE2] = { - ['0' ... '9'] = IN_DQ_UCODE3, - ['a' ... 'f'] = IN_DQ_UCODE3, - ['A' ... 'F'] = IN_DQ_UCODE3, - }, - [IN_DQ_UCODE1] = { - ['0' ... '9'] = IN_DQ_UCODE2, - ['a' ... 'f'] = IN_DQ_UCODE2, - ['A' ... 'F'] = IN_DQ_UCODE2, - }, - [IN_DQ_UCODE0] = { - ['0' ... '9'] = IN_DQ_UCODE1, - ['a' ... 'f'] = IN_DQ_UCODE1, - ['A' ... 'F'] = IN_DQ_UCODE1, - }, [IN_DQ_STRING_ESCAPE] = { - ['b'] = IN_DQ_STRING, - ['f'] = IN_DQ_STRING, - ['n'] = IN_DQ_STRING, - ['r'] = IN_DQ_STRING, - ['t'] = IN_DQ_STRING, - ['/'] = IN_DQ_STRING, - ['\\'] = IN_DQ_STRING, - ['\''] = IN_DQ_STRING, - ['\"'] = IN_DQ_STRING, - ['u'] = IN_DQ_UCODE0, + [0x20 ... 0xFD] = IN_DQ_STRING, }, [IN_DQ_STRING] = { [0x20 ... 0xFD] = IN_DQ_STRING, @@ -183,37 +148,8 @@ static const uint8_t json_lexer[][256] = { }, /* single quote string */ - [IN_SQ_UCODE3] = { - ['0' ... '9'] = IN_SQ_STRING, - ['a' ... 'f'] = IN_SQ_STRING, - ['A' ... 'F'] = IN_SQ_STRING, - }, - [IN_SQ_UCODE2] = { - ['0' ... '9'] = IN_SQ_UCODE3, - ['a' ... 'f'] = IN_SQ_UCODE3, - ['A' ... 'F'] = IN_SQ_UCODE3, - }, - [IN_SQ_UCODE1] = { - ['0' ... '9'] = IN_SQ_UCODE2, - ['a' ... 'f'] = IN_SQ_UCODE2, - ['A' ... 'F'] = IN_SQ_UCODE2, - }, - [IN_SQ_UCODE0] = { - ['0' ... '9'] = IN_SQ_UCODE1, - ['a' ... 'f'] = IN_SQ_UCODE1, - ['A' ... 'F'] = IN_SQ_UCODE1, - }, [IN_SQ_STRING_ESCAPE] = { - ['b'] = IN_SQ_STRING, - ['f'] = IN_SQ_STRING, - ['n'] = IN_SQ_STRING, - ['r'] = IN_SQ_STRING, - ['t'] = IN_SQ_STRING, - ['/'] = IN_SQ_STRING, - ['\\'] = IN_SQ_STRING, - ['\''] = IN_SQ_STRING, - ['\"'] = IN_SQ_STRING, - ['u'] = IN_SQ_UCODE0, + [0x20 ... 0xFD] = IN_SQ_STRING, }, [IN_SQ_STRING] = { [0x20 ... 0xFD] = IN_SQ_STRING, diff --git a/qobject/json-parser.c b/qobject/json-parser.c index a9b227f56c..7437827c24 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -106,30 +106,40 @@ static int hex2decimal(char ch) } /** - * parse_string(): Parse a json string and return a QObject + * parse_string(): Parse a JSON string * - * string - * "" - * " chars " - * chars - * char - * char chars - * char - * any-Unicode-character- - * except-"-or-\-or- - * control-character - * \" - * \\ - * \/ - * \b - * \f - * \n - * \r - * \t - * \u four-hex-digits + * From RFC 8259 "The JavaScript Object Notation (JSON) Data + * Interchange Format": + * + * char = unescaped / + * escape ( + * %x22 / ; " quotation mark U+0022 + * %x5C / ; \ reverse solidus U+005C + * %x2F / ; / solidus U+002F + * %x62 / ; b backspace U+0008 + * %x66 / ; f form feed U+000C + * %x6E / ; n line feed U+000A + * %x72 / ; r carriage return U+000D + * %x74 / ; t tab U+0009 + * %x75 4HEXDIG ) ; uXXXX U+XXXX + * escape = %x5C ; \ + * quotation-mark = %x22 ; " + * unescaped = %x20-21 / %x23-5B / %x5D-10FFFF + * + * Extensions over RFC 8259: + * - Extra escape sequence in strings: + * 0x27 (apostrophe) is recognized after escape, too + * - Single-quoted strings: + * Like double-quoted strings, except they're delimited by %x27 + * (apostrophe) instead of %x22 (quotation mark), and can't contain + * unescaped apostrophe, but can contain unescaped quotation mark. + * + * Note: + * - Encoding is modified UTF-8. + * - Invalid Unicode characters are rejected. + * - Control characters \x00..\x1F are rejected by the lexer. */ -static QString *qstring_from_escaped_str(JSONParserContext *ctxt, - JSONToken *token) +static QString *parse_string(JSONParserContext *ctxt, JSONToken *token) { const char *ptr = token->str; QString *str; @@ -495,7 +505,7 @@ static QObject *parse_literal(JSONParserContext *ctxt) switch (token->type) { case JSON_STRING: - return QOBJECT(qstring_from_escaped_str(ctxt, token)); + return QOBJECT(parse_string(ctxt, token)); case JSON_INTEGER: { /* * Represent JSON_INTEGER as QNUM_I64 if possible, else as -- cgit 1.4.1 From de6decfe8e2939464fc27ac2ab4ef87689329fec Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:39:54 +0200 Subject: json: Simplify parse_string() Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-28-armbru@redhat.com> --- qobject/json-parser.c | 42 +++++++++++++++++++----------------------- 1 file changed, 19 insertions(+), 23 deletions(-) (limited to 'qobject/json-parser.c') diff --git a/qobject/json-parser.c b/qobject/json-parser.c index 7437827c24..9cb363f7e1 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -101,8 +101,7 @@ static int hex2decimal(char ch) } else if (ch >= 'A' && ch <= 'F') { return 10 + (ch - 'A'); } - - return -1; + abort(); } /** @@ -144,7 +143,7 @@ static QString *parse_string(JSONParserContext *ctxt, JSONToken *token) const char *ptr = token->str; QString *str; char quote; - int cp; + int cp, i; char *end; ssize_t len; char utf8_buf[5]; @@ -159,51 +158,48 @@ static QString *parse_string(JSONParserContext *ctxt, JSONToken *token) ptr++; switch (*ptr++) { case '"': - qstring_append(str, "\""); + qstring_append_chr(str, '"'); break; case '\'': - qstring_append(str, "'"); + qstring_append_chr(str, '\''); break; case '\\': - qstring_append(str, "\\"); + qstring_append_chr(str, '\\'); break; case '/': - qstring_append(str, "/"); + qstring_append_chr(str, '/'); break; case 'b': - qstring_append(str, "\b"); + qstring_append_chr(str, '\b'); break; case 'f': - qstring_append(str, "\f"); + qstring_append_chr(str, '\f'); break; case 'n': - qstring_append(str, "\n"); + qstring_append_chr(str, '\n'); break; case 'r': - qstring_append(str, "\r"); + qstring_append_chr(str, '\r'); break; case 't': - qstring_append(str, "\t"); + qstring_append_chr(str, '\t'); break; - case 'u': { - uint16_t unicode_char = 0; - char utf8_char[4]; - int i = 0; - + case 'u': + cp = 0; for (i = 0; i < 4; i++) { - if (qemu_isxdigit(*ptr)) { - unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4); - } else { + if (!qemu_isxdigit(*ptr)) { parse_error(ctxt, token, "invalid hex escape sequence in string"); goto out; } + cp <<= 4; + cp |= hex2decimal(*ptr); ptr++; } - wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char)); - qstring_append(str, utf8_char); - } break; + wchar_to_utf8(cp, utf8_buf, sizeof(utf8_buf)); + qstring_append(str, utf8_buf); + break; default: parse_error(ctxt, token, "invalid escape sequence in string"); goto out; -- cgit 1.4.1 From 46a628b1398ae6a58d6847223736431225c4c0cc Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:39:55 +0200 Subject: json: Reject invalid \uXXXX, fix \u0000 The JSON parser translates invalid \uXXXX to garbage instead of rejecting it, and swallows \u0000. Fix by using mod_utf8_encode() instead of flawed wchar_to_utf8(). Valid surrogate pairs are now differently broken: they're rejected instead of translated to garbage. The next commit will fix them. Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-29-armbru@redhat.com> --- qobject/json-parser.c | 35 ++++++----------------------------- tests/check-qjson.c | 41 +++++++++++------------------------------ 2 files changed, 17 insertions(+), 59 deletions(-) (limited to 'qobject/json-parser.c') diff --git a/qobject/json-parser.c b/qobject/json-parser.c index 9cb363f7e1..e49da192fe 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -64,34 +64,6 @@ static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt, error_setg(&ctxt->err, "JSON parse error, %s", message); } -/** - * String helpers - * - * These helpers are used to unescape strings. - */ -static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length) -{ - if (wchar <= 0x007F) { - BUG_ON(buffer_length < 2); - - buffer[0] = wchar & 0x7F; - buffer[1] = 0; - } else if (wchar <= 0x07FF) { - BUG_ON(buffer_length < 3); - - buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F); - buffer[1] = 0x80 | (wchar & 0x3F); - buffer[2] = 0; - } else { - BUG_ON(buffer_length < 4); - - buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F); - buffer[1] = 0x80 | ((wchar >> 6) & 0x3F); - buffer[2] = 0x80 | (wchar & 0x3F); - buffer[3] = 0; - } -} - static int hex2decimal(char ch) { if (ch >= '0' && ch <= '9') { @@ -197,7 +169,12 @@ static QString *parse_string(JSONParserContext *ctxt, JSONToken *token) ptr++; } - wchar_to_utf8(cp, utf8_buf, sizeof(utf8_buf)); + if (mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp) < 0) { + parse_error(ctxt, token, + "\\u%.4s is not a valid Unicode character", + ptr - 3); + goto out; + } qstring_append(str, utf8_buf); break; default: diff --git a/tests/check-qjson.c b/tests/check-qjson.c index 3abf12b4d2..4abb5847ad 100644 --- a/tests/check-qjson.c +++ b/tests/check-qjson.c @@ -64,7 +64,7 @@ static void escaped_string(void) { "triple byte utf-8 \\u20AC", "triple byte utf-8 \xe2\x82\xac" }, { "quadruple byte utf-8 \\uD834\\uDD1E", /* U+1D11E */ /* bug: want \xF0\x9D\x84\x9E */ - "quadruple byte utf-8 \xED\xA0\xB4\xED\xB4\x9E", .skip = 1 }, + NULL }, { "\\", NULL }, { "\\z", NULL }, { "\\ux", NULL }, @@ -72,35 +72,16 @@ static void escaped_string(void) { "\\u12x", NULL }, { "\\u123x", NULL }, { "\\u12345", "\341\210\2645" }, - { "\\u0000x", "x", .skip = 1}, /* bug: want \xC0\x80x */ - { "unpaired leading surrogate \\uD800", - /* bug: not rejected */ - "unpaired leading surrogate \355\240\200", .skip = 1 }, - { "unpaired leading surrogate \\uD800\\uCAFE", - /* bug: not rejected */ - "unpaired leading surrogate \355\240\200\354\253\276", .skip = 1 }, - { "unpaired leading surrogate \\uD800\\uD801\\uDC02", - /* bug: not rejected */ - "unpaired leading surrogate \355\240\200\355\240\201\355\260\202", - .skip = 1 }, - { "unpaired trailing surrogate \\uDC00", - /* bug: not rejected */ - "unpaired trailing surrogate \355\260\200", .skip = 1}, - { "backward surrogate pair \\uDC00\\uD800", - /* bug: not rejected */ - "backward surrogate pair \355\260\200\355\240\200", .skip = 1}, - { "noncharacter U+FDD0 \\uFDD0", - /* bug: not rejected */ - "noncharacter U+FDD0 \xEF\xB7\x90", .skip = 1}, - { "noncharacter U+FDEF \\uFDEF", - /* bug: not rejected */ - "noncharacter U+FDEF \xEF\xB7\xAF", .skip = 1}, - { "noncharacter U+1FFFE \\uD87F\\uDFFE", - /* bug: not rejected */ - "noncharacter U+1FFFE \xED\xA1\xBF\xED\xBF\xBE", .skip = 1}, - { "noncharacter U+10FFFF \\uDC3F\\uDFFF", - /* bug: not rejected */ - "noncharacter U+10FFFF \xED\xB0\xBF\xED\xBF\xBF", .skip = 1}, + { "\\u0000x", "\xC0\x80x" }, + { "unpaired leading surrogate \\uD800", NULL }, + { "unpaired leading surrogate \\uD800\\uCAFE", NULL }, + { "unpaired leading surrogate \\uD800\\uD801\\uDC02", NULL }, + { "unpaired trailing surrogate \\uDC00", NULL }, + { "backward surrogate pair \\uDC00\\uD800", NULL }, + { "noncharacter U+FDD0 \\uFDD0", NULL }, + { "noncharacter U+FDEF \\uFDEF", NULL }, + { "noncharacter U+1FFFE \\uD87F\\uDFFE", NULL }, + { "noncharacter U+10FFFF \\uDC3F\\uDFFF", NULL }, {} }; int i, j; -- cgit 1.4.1 From dc45a07c3628b82817a96fcb7df3d211d901af5d Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:39:56 +0200 Subject: json: Fix \uXXXX for surrogate pairs The JSON parser treats each half of a surrogate pair as unpaired surrogate. Fix it to recognize surrogate pairs. Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-30-armbru@redhat.com> --- qobject/json-parser.c | 60 +++++++++++++++++++++++++++++++++------------------ tests/check-qjson.c | 3 +-- 2 files changed, 40 insertions(+), 23 deletions(-) (limited to 'qobject/json-parser.c') diff --git a/qobject/json-parser.c b/qobject/json-parser.c index e49da192fe..73e6ad7458 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -64,16 +64,27 @@ static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt, error_setg(&ctxt->err, "JSON parse error, %s", message); } -static int hex2decimal(char ch) +static int cvt4hex(const char *s) { - if (ch >= '0' && ch <= '9') { - return (ch - '0'); - } else if (ch >= 'a' && ch <= 'f') { - return 10 + (ch - 'a'); - } else if (ch >= 'A' && ch <= 'F') { - return 10 + (ch - 'A'); + int cp, i; + + cp = 0; + for (i = 0; i < 4; i++) { + if (!qemu_isxdigit(s[i])) { + return -1; + } + cp <<= 4; + if (s[i] >= '0' && s[i] <= '9') { + cp |= s[i] - '0'; + } else if (s[i] >= 'a' && s[i] <= 'f') { + cp |= 10 + s[i] - 'a'; + } else if (s[i] >= 'A' && s[i] <= 'F') { + cp |= 10 + s[i] - 'A'; + } else { + return -1; + } } - abort(); + return cp; } /** @@ -115,7 +126,8 @@ static QString *parse_string(JSONParserContext *ctxt, JSONToken *token) const char *ptr = token->str; QString *str; char quote; - int cp, i; + const char *beg; + int cp, trailing; char *end; ssize_t len; char utf8_buf[5]; @@ -127,7 +139,7 @@ static QString *parse_string(JSONParserContext *ctxt, JSONToken *token) while (*ptr != quote) { assert(*ptr); if (*ptr == '\\') { - ptr++; + beg = ptr++; switch (*ptr++) { case '"': qstring_append_chr(str, '"'); @@ -157,22 +169,28 @@ static QString *parse_string(JSONParserContext *ctxt, JSONToken *token) qstring_append_chr(str, '\t'); break; case 'u': - cp = 0; - for (i = 0; i < 4; i++) { - if (!qemu_isxdigit(*ptr)) { - parse_error(ctxt, token, - "invalid hex escape sequence in string"); - goto out; + cp = cvt4hex(ptr); + ptr += 4; + + /* handle surrogate pairs */ + if (cp >= 0xD800 && cp <= 0xDBFF + && ptr[0] == '\\' && ptr[1] == 'u') { + /* leading surrogate followed by \u */ + cp = 0x10000 + ((cp & 0x3FF) << 10); + trailing = cvt4hex(ptr + 2); + if (trailing >= 0xDC00 && trailing <= 0xDFFF) { + /* followed by trailing surrogate */ + cp |= trailing & 0x3FF; + ptr += 6; + } else { + cp = -1; /* invalid */ } - cp <<= 4; - cp |= hex2decimal(*ptr); - ptr++; } if (mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp) < 0) { parse_error(ctxt, token, - "\\u%.4s is not a valid Unicode character", - ptr - 3); + "%.*s is not a valid Unicode character", + (int)(ptr - beg), beg); goto out; } qstring_append(str, utf8_buf); diff --git a/tests/check-qjson.c b/tests/check-qjson.c index 4abb5847ad..343f8af36a 100644 --- a/tests/check-qjson.c +++ b/tests/check-qjson.c @@ -63,8 +63,7 @@ static void escaped_string(void) { "double byte utf-8 \\u00A2", "double byte utf-8 \xc2\xa2" }, { "triple byte utf-8 \\u20AC", "triple byte utf-8 \xe2\x82\xac" }, { "quadruple byte utf-8 \\uD834\\uDD1E", /* U+1D11E */ - /* bug: want \xF0\x9D\x84\x9E */ - NULL }, + "quadruple byte utf-8 \xF0\x9D\x84\x9E" }, { "\\", NULL }, { "\\z", NULL }, { "\\ux", NULL }, -- cgit 1.4.1 From e8b19d7d7300366a1dd85273512657bbeab564ab Mon Sep 17 00:00:00 2001 From: Marc-André Lureau Date: Thu, 23 Aug 2018 18:39:59 +0200 Subject: json-parser: simplify and avoid JSONParserContext allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit parser_context_new/free() are only used from json_parser_parse(). We can fold the code there and avoid an allocation altogether. Signed-off-by: Marc-André Lureau Message-Id: <20180719184111.5129-9-marcandre.lureau@redhat.com> Reviewed-by: Markus Armbruster Message-Id: <20180823164025.12553-33-armbru@redhat.com> --- qobject/json-parser.c | 41 +++++++++-------------------------------- 1 file changed, 9 insertions(+), 32 deletions(-) (limited to 'qobject/json-parser.c') diff --git a/qobject/json-parser.c b/qobject/json-parser.c index 73e6ad7458..7bfa08200c 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -237,33 +237,6 @@ static JSONToken *parser_context_peek_token(JSONParserContext *ctxt) return g_queue_peek_head(ctxt->buf); } -static JSONParserContext *parser_context_new(GQueue *tokens) -{ - JSONParserContext *ctxt; - - if (!tokens) { - return NULL; - } - - ctxt = g_malloc0(sizeof(JSONParserContext)); - ctxt->buf = tokens; - - return ctxt; -} - -/* to support error propagation, ctxt->err must be freed separately */ -static void parser_context_free(JSONParserContext *ctxt) -{ - if (ctxt) { - while (!g_queue_is_empty(ctxt->buf)) { - parser_context_pop_token(ctxt); - } - g_free(ctxt->current); - g_queue_free(ctxt->buf); - g_free(ctxt); - } -} - /** * Parsing rules */ @@ -575,18 +548,22 @@ QObject *json_parser_parse(GQueue *tokens, va_list *ap) QObject *json_parser_parse_err(GQueue *tokens, va_list *ap, Error **errp) { - JSONParserContext *ctxt = parser_context_new(tokens); + JSONParserContext ctxt = { .buf = tokens }; QObject *result; - if (!ctxt) { + if (!tokens) { return NULL; } - result = parse_value(ctxt, ap); + result = parse_value(&ctxt, ap); - error_propagate(errp, ctxt->err); + error_propagate(errp, ctxt.err); - parser_context_free(ctxt); + while (!g_queue_is_empty(ctxt.buf)) { + parser_context_pop_token(&ctxt); + } + g_free(ctxt.current); + g_queue_free(ctxt.buf); return result; } -- cgit 1.4.1 From 62815d85aed71eff7b6c3a524705180fb04f5d30 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:40:01 +0200 Subject: json: Redesign the callback to consume JSON values The classical way to structure parser and lexer is to have the client call the parser to get an abstract syntax tree, the parser call the lexer to get the next token, and the lexer call some function to get input characters. Another way to structure them would be to have the client feed characters to the lexer, the lexer feed tokens to the parser, and the parser feed abstract syntax trees to some callback provided by the client. This way is more easily integrated into an event loop that dispatches input characters as they arrive. Our JSON parser is kind of between the two. The lexer feeds tokens to a "streamer" instead of a real parser. The streamer accumulates tokens until it got the sequence of tokens that comprise a single JSON value (it counts curly braces and square brackets to decide). It feeds those token sequences to a callback provided by the client. The callback passes each token sequence to the parser, and gets back an abstract syntax tree. I figure it was done that way to make a straightforward recursive descent parser possible. "Get next token" becomes "pop the first token off the token sequence". Drawback: we need to store a complete token sequence. Each token eats 13 + input characters + malloc overhead bytes. Observations: 1. This is not the only way to use recursive descent. If we replaced "get next token" by a coroutine yield, we could do without a streamer. 2. The lexer reports errors by passing a JSON_ERROR token to the streamer. This communicates the offending input characters and their location, but no more. 3. The streamer reports errors by passing a null token sequence to the callback. The (already poor) lexical error information is thrown away. 4. Having the callback receive a token sequence duplicates the code to convert token sequence to abstract syntax tree in every callback. 5. Known bug: the streamer silently drops incomplete token sequences. This commit rectifies 4. by lifting the call of the parser from the callbacks into the streamer. Later commits will address 3. and 5. The lifting removes a bug from qjson.c's parse_json(): it passed a pointer to a non-null Error * in certain cases, as demonstrated by check-qjson.c. json_parser_parse() is now unused. It's a stupid wrapper around json_parser_parse_err(). Drop it, and rename json_parser_parse_err() to json_parser_parse(). Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-35-armbru@redhat.com> --- include/qapi/qmp/json-parser.h | 3 +-- include/qapi/qmp/json-streamer.h | 8 ++++++-- monitor.c | 18 ++++++++---------- qapi/qmp-dispatch.c | 1 - qga/main.c | 12 +++--------- qobject/json-parser.c | 7 +------ qobject/json-streamer.c | 19 +++++++++++-------- qobject/qjson.c | 14 +++++--------- tests/check-qjson.c | 1 - tests/libqtest.c | 10 ++++------ 10 files changed, 39 insertions(+), 54 deletions(-) (limited to 'qobject/json-parser.c') diff --git a/include/qapi/qmp/json-parser.h b/include/qapi/qmp/json-parser.h index 102f5c0068..a34209db7a 100644 --- a/include/qapi/qmp/json-parser.h +++ b/include/qapi/qmp/json-parser.h @@ -16,7 +16,6 @@ #include "qemu-common.h" -QObject *json_parser_parse(GQueue *tokens, va_list *ap); -QObject *json_parser_parse_err(GQueue *tokens, va_list *ap, Error **errp); +QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp); #endif diff --git a/include/qapi/qmp/json-streamer.h b/include/qapi/qmp/json-streamer.h index 7922e185a5..e162fd01da 100644 --- a/include/qapi/qmp/json-streamer.h +++ b/include/qapi/qmp/json-streamer.h @@ -25,7 +25,9 @@ typedef struct JSONToken { typedef struct JSONMessageParser { - void (*emit)(struct JSONMessageParser *parser, GQueue *tokens); + void (*emit)(void *opaque, QObject *json, Error *err); + void *opaque; + va_list *ap; JSONLexer lexer; int brace_count; int bracket_count; @@ -37,7 +39,9 @@ void json_message_process_token(JSONLexer *lexer, GString *input, JSONTokenType type, int x, int y); void json_message_parser_init(JSONMessageParser *parser, - void (*func)(JSONMessageParser *, GQueue *)); + void (*emit)(void *opaque, QObject *json, + Error *err), + void *opaque, va_list *ap); void json_message_parser_feed(JSONMessageParser *parser, const char *buffer, size_t size); diff --git a/monitor.c b/monitor.c index 94f673511b..08f799a7bb 100644 --- a/monitor.c +++ b/monitor.c @@ -59,7 +59,6 @@ #include "qapi/qmp/qstring.h" #include "qapi/qmp/qjson.h" #include "qapi/qmp/json-streamer.h" -#include "qapi/qmp/json-parser.h" #include "qapi/qmp/qlist.h" #include "qom/object_interfaces.h" #include "trace-root.h" @@ -4256,18 +4255,15 @@ static void monitor_qmp_bh_dispatcher(void *data) #define QMP_REQ_QUEUE_LEN_MAX (8) -static void handle_qmp_command(JSONMessageParser *parser, GQueue *tokens) +static void handle_qmp_command(void *opaque, QObject *req, Error *err) { - QObject *req, *id = NULL; + Monitor *mon = opaque; + QObject *id = NULL; QDict *qdict; - MonitorQMP *mon_qmp = container_of(parser, MonitorQMP, parser); - Monitor *mon = container_of(mon_qmp, Monitor, qmp); - Error *err = NULL; QMPRequest *req_obj; - req = json_parser_parse_err(tokens, NULL, &err); if (!req && !err) { - /* json_parser_parse_err() sucks: can fail without setting @err */ + /* json_parser_parse() sucks: can fail without setting @err */ error_setg(&err, QERR_JSON_PARSING); } @@ -4465,7 +4461,8 @@ static void monitor_qmp_event(void *opaque, int event) monitor_qmp_response_flush(mon); monitor_qmp_cleanup_queues(mon); json_message_parser_destroy(&mon->qmp.parser); - json_message_parser_init(&mon->qmp.parser, handle_qmp_command); + json_message_parser_init(&mon->qmp.parser, handle_qmp_command, + mon, NULL); mon_refcount--; monitor_fdsets_cleanup(); break; @@ -4683,7 +4680,8 @@ void monitor_init(Chardev *chr, int flags) if (monitor_is_qmp(mon)) { qemu_chr_fe_set_echo(&mon->chr, true); - json_message_parser_init(&mon->qmp.parser, handle_qmp_command); + json_message_parser_init(&mon->qmp.parser, handle_qmp_command, + mon, NULL); if (mon->use_io_thread) { /* * Make sure the old iowatch is gone. It's possible when diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c index 6f2d466596..d8da1a62de 100644 --- a/qapi/qmp-dispatch.c +++ b/qapi/qmp-dispatch.c @@ -14,7 +14,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qapi/qmp/dispatch.h" -#include "qapi/qmp/json-parser.h" #include "qapi/qmp/qdict.h" #include "qapi/qmp/qjson.h" #include "qapi/qmp/qbool.h" diff --git a/qga/main.c b/qga/main.c index 87372d40ef..2fc49d00d8 100644 --- a/qga/main.c +++ b/qga/main.c @@ -19,7 +19,6 @@ #include #endif #include "qapi/qmp/json-streamer.h" -#include "qapi/qmp/json-parser.h" #include "qapi/qmp/qdict.h" #include "qapi/qmp/qjson.h" #include "qapi/qmp/qstring.h" @@ -597,18 +596,13 @@ static void process_command(GAState *s, QDict *req) } /* handle requests/control events coming in over the channel */ -static void process_event(JSONMessageParser *parser, GQueue *tokens) +static void process_event(void *opaque, QObject *obj, Error *err) { - GAState *s = container_of(parser, GAState, parser); - QObject *obj; + GAState *s = opaque; QDict *req, *rsp; - Error *err = NULL; int ret; - g_assert(s && parser); - g_debug("process_event: called"); - obj = json_parser_parse_err(tokens, NULL, &err); if (err) { goto err; } @@ -1320,7 +1314,7 @@ static int run_agent(GAState *s, GAConfig *config, int socket_activation) s->command_state = ga_command_state_new(); ga_command_state_init(s, s->command_state); ga_command_state_init_all(s->command_state); - json_message_parser_init(&s->parser, process_event); + json_message_parser_init(&s->parser, process_event, s, NULL); #ifndef _WIN32 if (!register_signal_handlers()) { diff --git a/qobject/json-parser.c b/qobject/json-parser.c index 7bfa08200c..95fa348e21 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -541,12 +541,7 @@ static QObject *parse_value(JSONParserContext *ctxt, va_list *ap) } } -QObject *json_parser_parse(GQueue *tokens, va_list *ap) -{ - return json_parser_parse_err(tokens, ap, NULL); -} - -QObject *json_parser_parse_err(GQueue *tokens, va_list *ap, Error **errp) +QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp) { JSONParserContext ctxt = { .buf = tokens }; QObject *result; diff --git a/qobject/json-streamer.c b/qobject/json-streamer.c index 9f57ebf2bd..7fd0ff8756 100644 --- a/qobject/json-streamer.c +++ b/qobject/json-streamer.c @@ -14,6 +14,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "qapi/qmp/json-lexer.h" +#include "qapi/qmp/json-parser.h" #include "qapi/qmp/json-streamer.h" #define MAX_TOKEN_SIZE (64ULL << 20) @@ -38,8 +39,9 @@ void json_message_process_token(JSONLexer *lexer, GString *input, JSONTokenType type, int x, int y) { JSONMessageParser *parser = container_of(lexer, JSONMessageParser, lexer); + Error *err = NULL; JSONToken *token; - GQueue *tokens; + QObject *json; switch (type) { case JSON_LCURLY: @@ -97,19 +99,20 @@ out_emit: /* send current list of tokens to parser and reset tokenizer */ parser->brace_count = 0; parser->bracket_count = 0; - /* parser->emit takes ownership of parser->tokens. Remove our own - * reference to parser->tokens before handing it out to parser->emit. - */ - tokens = parser->tokens; + json = json_parser_parse(parser->tokens, parser->ap, &err); parser->tokens = g_queue_new(); - parser->emit(parser, tokens); parser->token_size = 0; + parser->emit(parser->opaque, json, err); } void json_message_parser_init(JSONMessageParser *parser, - void (*func)(JSONMessageParser *, GQueue *)) + void (*emit)(void *opaque, QObject *json, + Error *err), + void *opaque, va_list *ap) { - parser->emit = func; + parser->emit = emit; + parser->opaque = opaque; + parser->ap = ap; parser->brace_count = 0; parser->bracket_count = 0; parser->tokens = g_queue_new(); diff --git a/qobject/qjson.c b/qobject/qjson.c index ab4040f235..7395556069 100644 --- a/qobject/qjson.c +++ b/qobject/qjson.c @@ -13,8 +13,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "qapi/qmp/json-lexer.h" -#include "qapi/qmp/json-parser.h" #include "qapi/qmp/json-streamer.h" #include "qapi/qmp/qjson.h" #include "qapi/qmp/qbool.h" @@ -27,16 +25,16 @@ typedef struct JSONParsingState { JSONMessageParser parser; - va_list *ap; QObject *result; Error *err; } JSONParsingState; -static void parse_json(JSONMessageParser *parser, GQueue *tokens) +static void consume_json(void *opaque, QObject *json, Error *err) { - JSONParsingState *s = container_of(parser, JSONParsingState, parser); + JSONParsingState *s = opaque; - s->result = json_parser_parse_err(tokens, s->ap, &s->err); + s->result = json; + error_propagate(&s->err, err); } /* @@ -54,9 +52,7 @@ static QObject *qobject_from_jsonv(const char *string, va_list *ap, { JSONParsingState state = {}; - state.ap = ap; - - json_message_parser_init(&state.parser, parse_json); + json_message_parser_init(&state.parser, consume_json, &state, ap); json_message_parser_feed(&state.parser, string, strlen(string)); json_message_parser_flush(&state.parser); json_message_parser_destroy(&state.parser); diff --git a/tests/check-qjson.c b/tests/check-qjson.c index defc21fa04..604886a1a2 100644 --- a/tests/check-qjson.c +++ b/tests/check-qjson.c @@ -1438,7 +1438,6 @@ static void multiple_values(void) qobject_unref(obj); /* BUG simultaneously succeeds and fails */ - /* BUG calls json_parser_parse() with errp pointing to non-null */ obj = qobject_from_json("} true", &err); g_assert(qbool_get_bool(qobject_to(QBool, obj))); error_free_or_abort(&err); diff --git a/tests/libqtest.c b/tests/libqtest.c index af2a24e796..1f3b0cb1b1 100644 --- a/tests/libqtest.c +++ b/tests/libqtest.c @@ -21,9 +21,9 @@ #include #include "libqtest.h" +#include "qemu-common.h" #include "qemu/cutils.h" #include "qapi/error.h" -#include "qapi/qmp/json-parser.h" #include "qapi/qmp/json-streamer.h" #include "qapi/qmp/qdict.h" #include "qapi/qmp/qjson.h" @@ -446,12 +446,10 @@ typedef struct { QDict *response; } QMPResponseParser; -static void qmp_response(JSONMessageParser *parser, GQueue *tokens) +static void qmp_response(void *opaque, QObject *obj, Error *err) { - QMPResponseParser *qmp = container_of(parser, QMPResponseParser, parser); - QObject *obj; + QMPResponseParser *qmp = opaque; - obj = json_parser_parse(tokens, NULL); if (!obj) { fprintf(stderr, "QMP JSON response parsing failed\n"); abort(); @@ -468,7 +466,7 @@ QDict *qmp_fd_receive(int fd) bool log = getenv("QTEST_LOG") != NULL; qmp.response = NULL; - json_message_parser_init(&qmp.parser, qmp_response); + json_message_parser_init(&qmp.parser, qmp_response, &qmp, NULL); while (!qmp.response) { ssize_t len; char c; -- cgit 1.4.1 From ff281a272f67a07d8ea29ca0350c4a3e0d3de73c Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:40:02 +0200 Subject: json: Don't pass null @tokens to json_parser_parse() json_parser_parse() normally returns the QObject on success. Except it returns null when its @tokens argument is null. Its only caller json_message_process_token() passes null @tokens when emitting a lexical error. The call is a rather opaque way to say json = NULL then. Simplify matters by lifting the assignment to json out of the emit path: initialize json to null, set it to the value of json_parser_parse() when there's no lexical error. Drop the special case from json_parser_parse(). Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-36-armbru@redhat.com> --- qobject/json-parser.c | 4 ---- qobject/json-streamer.c | 25 ++++++++++++------------- 2 files changed, 12 insertions(+), 17 deletions(-) (limited to 'qobject/json-parser.c') diff --git a/qobject/json-parser.c b/qobject/json-parser.c index 95fa348e21..06aff19a5d 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -546,10 +546,6 @@ QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp) JSONParserContext ctxt = { .buf = tokens }; QObject *result; - if (!tokens) { - return NULL; - } - result = parse_value(&ctxt, ap); error_propagate(errp, ctxt.err); diff --git a/qobject/json-streamer.c b/qobject/json-streamer.c index 7fd0ff8756..0c33186e8e 100644 --- a/qobject/json-streamer.c +++ b/qobject/json-streamer.c @@ -39,9 +39,9 @@ void json_message_process_token(JSONLexer *lexer, GString *input, JSONTokenType type, int x, int y) { JSONMessageParser *parser = container_of(lexer, JSONMessageParser, lexer); + QObject *json = NULL; Error *err = NULL; JSONToken *token; - QObject *json; switch (type) { case JSON_LCURLY: @@ -72,34 +72,33 @@ void json_message_process_token(JSONLexer *lexer, GString *input, g_queue_push_tail(parser->tokens, token); if (type == JSON_ERROR) { - goto out_emit_bad; - } else if (parser->brace_count < 0 || + goto out_emit; + } + + if (parser->brace_count < 0 || parser->bracket_count < 0 || (parser->brace_count == 0 && parser->bracket_count == 0)) { + json = json_parser_parse(parser->tokens, parser->ap, &err); + parser->tokens = NULL; goto out_emit; - } else if (parser->token_size > MAX_TOKEN_SIZE || + } + + if (parser->token_size > MAX_TOKEN_SIZE || g_queue_get_length(parser->tokens) > MAX_TOKEN_COUNT || parser->bracket_count + parser->brace_count > MAX_NESTING) { /* Security consideration, we limit total memory allocated per object * and the maximum recursion depth that a message can force. */ - goto out_emit_bad; + goto out_emit; } return; -out_emit_bad: - /* - * Clear out token list and tell the parser to emit an error - * indication by passing it a NULL list - */ - json_message_free_tokens(parser); out_emit: - /* send current list of tokens to parser and reset tokenizer */ parser->brace_count = 0; parser->bracket_count = 0; - json = json_parser_parse(parser->tokens, parser->ap, &err); + json_message_free_tokens(parser); parser->tokens = g_queue_new(); parser->token_size = 0; parser->emit(parser->opaque, json, err); -- cgit 1.4.1 From 61030280ca2d67bd63cb068250aee55849cd38ca Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:40:04 +0200 Subject: json: Rename token JSON_ESCAPE & friends to JSON_INTERP The JSON parser optionally supports interpolation. The code calls it "escape". Awkward, because it uses the same term for escape sequences within strings. The latter usage is consistent with RFC 8259 "The JavaScript Object Notation (JSON) Data Interchange Format" and ISO C. Call the former "interpolation" instead. Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-38-armbru@redhat.com> --- include/qapi/qmp/json-lexer.h | 2 +- qobject/json-lexer.c | 64 +++++++++++++++++++++---------------------- qobject/json-parser.c | 8 +++--- 3 files changed, 37 insertions(+), 37 deletions(-) (limited to 'qobject/json-parser.c') diff --git a/include/qapi/qmp/json-lexer.h b/include/qapi/qmp/json-lexer.h index 44bcf2ca64..8bce6ef676 100644 --- a/include/qapi/qmp/json-lexer.h +++ b/include/qapi/qmp/json-lexer.h @@ -27,7 +27,7 @@ typedef enum json_token_type { JSON_FLOAT, JSON_KEYWORD, JSON_STRING, - JSON_ESCAPE, + JSON_INTERP, JSON_SKIP, JSON_ERROR, } JSONTokenType; diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c index 17272a3874..5436809be6 100644 --- a/qobject/json-lexer.c +++ b/qobject/json-lexer.c @@ -115,12 +115,12 @@ enum json_lexer_state { IN_NONZERO_NUMBER, IN_NEG_NONZERO_NUMBER, IN_KEYWORD, - IN_ESCAPE, - IN_ESCAPE_L, - IN_ESCAPE_LL, - IN_ESCAPE_I, - IN_ESCAPE_I6, - IN_ESCAPE_I64, + IN_INTERP, + IN_INTERP_L, + IN_INTERP_LL, + IN_INTERP_I, + IN_INTERP_I6, + IN_INTERP_I64, IN_WHITESPACE, IN_START, }; @@ -221,40 +221,40 @@ static const uint8_t json_lexer[][256] = { ['\n'] = IN_WHITESPACE, }, - /* escape */ - [IN_ESCAPE_LL] = { - ['d'] = JSON_ESCAPE, - ['u'] = JSON_ESCAPE, + /* interpolation */ + [IN_INTERP_LL] = { + ['d'] = JSON_INTERP, + ['u'] = JSON_INTERP, }, - [IN_ESCAPE_L] = { - ['d'] = JSON_ESCAPE, - ['l'] = IN_ESCAPE_LL, - ['u'] = JSON_ESCAPE, + [IN_INTERP_L] = { + ['d'] = JSON_INTERP, + ['l'] = IN_INTERP_LL, + ['u'] = JSON_INTERP, }, - [IN_ESCAPE_I64] = { - ['d'] = JSON_ESCAPE, - ['u'] = JSON_ESCAPE, + [IN_INTERP_I64] = { + ['d'] = JSON_INTERP, + ['u'] = JSON_INTERP, }, - [IN_ESCAPE_I6] = { - ['4'] = IN_ESCAPE_I64, + [IN_INTERP_I6] = { + ['4'] = IN_INTERP_I64, }, - [IN_ESCAPE_I] = { - ['6'] = IN_ESCAPE_I6, + [IN_INTERP_I] = { + ['6'] = IN_INTERP_I6, }, - [IN_ESCAPE] = { - ['d'] = JSON_ESCAPE, - ['i'] = JSON_ESCAPE, - ['p'] = JSON_ESCAPE, - ['s'] = JSON_ESCAPE, - ['u'] = JSON_ESCAPE, - ['f'] = JSON_ESCAPE, - ['l'] = IN_ESCAPE_L, - ['I'] = IN_ESCAPE_I, + [IN_INTERP] = { + ['d'] = JSON_INTERP, + ['i'] = JSON_INTERP, + ['p'] = JSON_INTERP, + ['s'] = JSON_INTERP, + ['u'] = JSON_INTERP, + ['f'] = JSON_INTERP, + ['l'] = IN_INTERP_L, + ['I'] = IN_INTERP_I, }, /* top level rule */ @@ -271,7 +271,7 @@ static const uint8_t json_lexer[][256] = { [','] = JSON_COMMA, [':'] = JSON_COLON, ['a' ... 'z'] = IN_KEYWORD, - ['%'] = IN_ESCAPE, + ['%'] = IN_INTERP, [' '] = IN_WHITESPACE, ['\t'] = IN_WHITESPACE, ['\r'] = IN_WHITESPACE, @@ -311,7 +311,7 @@ static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) case JSON_RSQUARE: case JSON_COLON: case JSON_COMMA: - case JSON_ESCAPE: + case JSON_INTERP: case JSON_INTEGER: case JSON_FLOAT: case JSON_KEYWORD: diff --git a/qobject/json-parser.c b/qobject/json-parser.c index 06aff19a5d..864cb578d8 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -423,7 +423,7 @@ static QObject *parse_keyword(JSONParserContext *ctxt) return NULL; } -static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap) +static QObject *parse_interpolation(JSONParserContext *ctxt, va_list *ap) { JSONToken *token; @@ -432,7 +432,7 @@ static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap) } token = parser_context_pop_token(ctxt); - assert(token && token->type == JSON_ESCAPE); + assert(token && token->type == JSON_INTERP); if (!strcmp(token->str, "%p")) { return va_arg(*ap, QObject *); @@ -527,8 +527,8 @@ static QObject *parse_value(JSONParserContext *ctxt, va_list *ap) return parse_object(ctxt, ap); case JSON_LSQUARE: return parse_array(ctxt, ap); - case JSON_ESCAPE: - return parse_escape(ctxt, ap); + case JSON_INTERP: + return parse_interpolation(ctxt, ap); case JSON_INTEGER: case JSON_FLOAT: case JSON_STRING: -- cgit 1.4.1 From 2cbd15aa6f4d4694376dd0d231d56e572ac870c1 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:40:05 +0200 Subject: json: Treat unwanted interpolation as lexical error The JSON parser optionally supports interpolation. The lexer recognizes interpolation tokens unconditionally. The parser rejects them when interpolation is disabled, in parse_interpolation(). However, it neglects to set an error then, which can make json_parser_parse() fail without setting an error. Move the check for unwanted interpolation from the parser's parse_interpolation() into the lexer's finite state machine. When interpolation is disabled, '%' is now handled like any other unexpected character. The next commit will improve how such lexical errors are handled. Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-39-armbru@redhat.com> --- include/qapi/qmp/json-lexer.h | 4 ++-- qobject/json-lexer.c | 30 ++++++++++++++++++------------ qobject/json-parser.c | 4 ---- qobject/json-streamer.c | 2 +- tests/qmp-test.c | 4 ++++ 5 files changed, 25 insertions(+), 19 deletions(-) (limited to 'qobject/json-parser.c') diff --git a/include/qapi/qmp/json-lexer.h b/include/qapi/qmp/json-lexer.h index 8bce6ef676..afa84cb910 100644 --- a/include/qapi/qmp/json-lexer.h +++ b/include/qapi/qmp/json-lexer.h @@ -33,12 +33,12 @@ typedef enum json_token_type { } JSONTokenType; typedef struct JSONLexer { - int state; + int start_state, state; GString *token; int x, y; } JSONLexer; -void json_lexer_init(JSONLexer *lexer); +void json_lexer_init(JSONLexer *lexer, bool enable_interpolation); void json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size); diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c index 5436809be6..96fe13621d 100644 --- a/qobject/json-lexer.c +++ b/qobject/json-lexer.c @@ -92,7 +92,7 @@ * Like double-quoted strings, except they're delimited by %x27 * (apostrophe) instead of %x22 (quotation mark), and can't contain * unescaped apostrophe, but can contain unescaped quotation mark. - * - Interpolation: + * - Interpolation, if enabled: * interpolation = %((l|ll|I64)[du]|[ipsf]) * * Note: @@ -123,9 +123,11 @@ enum json_lexer_state { IN_INTERP_I64, IN_WHITESPACE, IN_START, + IN_START_INTERP, /* must be IN_START + 1 */ }; -QEMU_BUILD_BUG_ON((int)JSON_MIN <= (int)IN_START); +QEMU_BUILD_BUG_ON((int)JSON_MIN <= (int)IN_START_INTERP); +QEMU_BUILD_BUG_ON(IN_START_INTERP != IN_START + 1); #define TERMINAL(state) [0 ... 0x7F] = (state) @@ -257,8 +259,12 @@ static const uint8_t json_lexer[][256] = { ['I'] = IN_INTERP_I, }, - /* top level rule */ - [IN_START] = { + /* + * Two start states: + * - IN_START recognizes JSON tokens with our string extensions + * - IN_START_INTERP additionally recognizes interpolation. + */ + [IN_START ... IN_START_INTERP] = { ['"'] = IN_DQ_STRING, ['\''] = IN_SQ_STRING, ['0'] = IN_ZERO, @@ -271,17 +277,18 @@ static const uint8_t json_lexer[][256] = { [','] = JSON_COMMA, [':'] = JSON_COLON, ['a' ... 'z'] = IN_KEYWORD, - ['%'] = IN_INTERP, [' '] = IN_WHITESPACE, ['\t'] = IN_WHITESPACE, ['\r'] = IN_WHITESPACE, ['\n'] = IN_WHITESPACE, }, + [IN_START_INTERP]['%'] = IN_INTERP, }; -void json_lexer_init(JSONLexer *lexer) +void json_lexer_init(JSONLexer *lexer, bool enable_interpolation) { - lexer->state = IN_START; + lexer->start_state = lexer->state = enable_interpolation + ? IN_START_INTERP : IN_START; lexer->token = g_string_sized_new(3); lexer->x = lexer->y = 0; } @@ -321,7 +328,7 @@ static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) /* fall through */ case JSON_SKIP: g_string_truncate(lexer->token, 0); - new_state = IN_START; + new_state = lexer->start_state; break; case IN_ERROR: /* XXX: To avoid having previous bad input leaving the parser in an @@ -340,8 +347,7 @@ static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) json_message_process_token(lexer, lexer->token, JSON_ERROR, lexer->x, lexer->y); g_string_truncate(lexer->token, 0); - new_state = IN_START; - lexer->state = new_state; + lexer->state = lexer->start_state; return; default: break; @@ -356,7 +362,7 @@ static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) json_message_process_token(lexer, lexer->token, lexer->state, lexer->x, lexer->y); g_string_truncate(lexer->token, 0); - lexer->state = IN_START; + lexer->state = lexer->start_state; } } @@ -371,7 +377,7 @@ void json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size) void json_lexer_flush(JSONLexer *lexer) { - if (lexer->state != IN_START) { + if (lexer->state != lexer->start_state) { json_lexer_feed_char(lexer, 0, true); } } diff --git a/qobject/json-parser.c b/qobject/json-parser.c index 864cb578d8..2855eaaeca 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -427,10 +427,6 @@ static QObject *parse_interpolation(JSONParserContext *ctxt, va_list *ap) { JSONToken *token; - if (ap == NULL) { - return NULL; - } - token = parser_context_pop_token(ctxt); assert(token && token->type == JSON_INTERP); diff --git a/qobject/json-streamer.c b/qobject/json-streamer.c index fa595a8761..a373e0114a 100644 --- a/qobject/json-streamer.c +++ b/qobject/json-streamer.c @@ -115,7 +115,7 @@ void json_message_parser_init(JSONMessageParser *parser, parser->tokens = g_queue_new(); parser->token_size = 0; - json_lexer_init(&parser->lexer); + json_lexer_init(&parser->lexer, !!ap); } void json_message_parser_feed(JSONMessageParser *parser, diff --git a/tests/qmp-test.c b/tests/qmp-test.c index 7b3ba17c4a..4ae2245484 100644 --- a/tests/qmp-test.c +++ b/tests/qmp-test.c @@ -94,6 +94,10 @@ static void test_malformed(QTestState *qts) /* lexical error: interpolation */ qtest_qmp_send_raw(qts, "%%p\n"); + /* two errors, one for "%", one for "p" */ + resp = qtest_qmp_receive(qts); + g_assert_cmpstr(get_error_class(resp), ==, "GenericError"); + qobject_unref(resp); resp = qtest_qmp_receive(qts); g_assert_cmpstr(get_error_class(resp), ==, "GenericError"); qobject_unref(resp); -- cgit 1.4.1 From f7617d45d4652ae10d38bd0c917d7488d155cccb Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:40:07 +0200 Subject: json: Leave rejecting invalid interpolation to parser Both lexer and parser reject invalid interpolation specifications. The parser's check is useless. The lexer ends the token right after the first bad character. This tends to lead to suboptimal error reporting. For instance, input [ %04d ] produces the tokens JSON_LSQUARE [ JSON_ERROR %0 JSON_INTEGER 4 JSON_KEYWORD d JSON_RSQUARE ] The parser then yields an error, an object and two more errors: error: Invalid JSON syntax object: 4 error: JSON parse error, invalid keyword error: JSON parse error, expecting value Dumb down the lexer to accept [A-Za-z0-9]*. The parser's check is now used. Emit a proper error there. The lexer now produces JSON_LSQUARE [ JSON_INTERP %04d JSON_RSQUARE ] and the parser reports just JSON parse error, invalid interpolation '%04d' Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-41-armbru@redhat.com> --- qobject/json-lexer.c | 44 ++++++-------------------------------------- qobject/json-parser.c | 1 + tests/check-qjson.c | 3 ++- 3 files changed, 9 insertions(+), 39 deletions(-) (limited to 'qobject/json-parser.c') diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c index 7c31c2c8ff..f1a4b5a430 100644 --- a/qobject/json-lexer.c +++ b/qobject/json-lexer.c @@ -93,7 +93,8 @@ * (apostrophe) instead of %x22 (quotation mark), and can't contain * unescaped apostrophe, but can contain unescaped quotation mark. * - Interpolation, if enabled: - * interpolation = %((l|ll|I64)[du]|[ipsf]) + * The lexer accepts %[A-Za-z0-9]*, and leaves rejecting invalid + * ones to the parser. * * Note: * - Input must be encoded in modified UTF-8. @@ -116,11 +117,6 @@ enum json_lexer_state { IN_NEG_NONZERO_NUMBER, IN_KEYWORD, IN_INTERP, - IN_INTERP_L, - IN_INTERP_LL, - IN_INTERP_I, - IN_INTERP_I6, - IN_INTERP_I64, IN_WHITESPACE, IN_START, IN_START_INTERP, /* must be IN_START + 1 */ @@ -224,39 +220,11 @@ static const uint8_t json_lexer[][256] = { }, /* interpolation */ - [IN_INTERP_LL] = { - ['d'] = JSON_INTERP, - ['u'] = JSON_INTERP, - }, - - [IN_INTERP_L] = { - ['d'] = JSON_INTERP, - ['l'] = IN_INTERP_LL, - ['u'] = JSON_INTERP, - }, - - [IN_INTERP_I64] = { - ['d'] = JSON_INTERP, - ['u'] = JSON_INTERP, - }, - - [IN_INTERP_I6] = { - ['4'] = IN_INTERP_I64, - }, - - [IN_INTERP_I] = { - ['6'] = IN_INTERP_I6, - }, - [IN_INTERP] = { - ['d'] = JSON_INTERP, - ['i'] = JSON_INTERP, - ['p'] = JSON_INTERP, - ['s'] = JSON_INTERP, - ['u'] = JSON_INTERP, - ['f'] = JSON_INTERP, - ['l'] = IN_INTERP_L, - ['I'] = IN_INTERP_I, + TERMINAL(JSON_INTERP), + ['A' ... 'Z'] = IN_INTERP, + ['a' ... 'z'] = IN_INTERP, + ['0' ... '9'] = IN_INTERP, }, /* diff --git a/qobject/json-parser.c b/qobject/json-parser.c index 2855eaaeca..e61cee9e8a 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -453,6 +453,7 @@ static QObject *parse_interpolation(JSONParserContext *ctxt, va_list *ap) } else if (!strcmp(token->str, "%f")) { return QOBJECT(qnum_from_double(va_arg(*ap, double))); } + parse_error(ctxt, token, "invalid interpolation '%s'", token->str); return NULL; } diff --git a/tests/check-qjson.c b/tests/check-qjson.c index d6fda0786f..83f8a0e6e3 100644 --- a/tests/check-qjson.c +++ b/tests/check-qjson.c @@ -1021,7 +1021,8 @@ static void interpolation_unknown(void) } g_test_trap_subprocess(NULL, 0, 0); g_test_trap_assert_failed(); - g_test_trap_assert_stderr("*Unexpected error*stray '%x'*"); + g_test_trap_assert_stderr("*Unexpected error*" + "invalid interpolation '%x'*"); } static void interpolation_string(void) -- cgit 1.4.1 From 53a0d616fecab09870411573afc58fd24ffb8648 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:40:08 +0200 Subject: json: Replace %I64d, %I64u by %PRId64, %PRIu64 Support for %I64d got added in commit 2c0d4b36e7f "json: fix PRId64 on Win32". We had to hard-code I64d because we used the lexer's finite state machine to check interpolations. No more, so clean this up. Additional conversion specifications would be easy enough to implement when needed. Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-42-armbru@redhat.com> --- qobject/json-parser.c | 10 ++++++---- tests/check-qjson.c | 10 ++++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) (limited to 'qobject/json-parser.c') diff --git a/qobject/json-parser.c b/qobject/json-parser.c index e61cee9e8a..27e873ad3b 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -438,16 +438,18 @@ static QObject *parse_interpolation(JSONParserContext *ctxt, va_list *ap) return QOBJECT(qnum_from_int(va_arg(*ap, int))); } else if (!strcmp(token->str, "%ld")) { return QOBJECT(qnum_from_int(va_arg(*ap, long))); - } else if (!strcmp(token->str, "%lld") || - !strcmp(token->str, "%I64d")) { + } else if (!strcmp(token->str, "%lld")) { return QOBJECT(qnum_from_int(va_arg(*ap, long long))); + } else if (!strcmp(token->str, "%" PRId64)) { + return QOBJECT(qnum_from_int(va_arg(*ap, int64_t))); } else if (!strcmp(token->str, "%u")) { return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned int))); } else if (!strcmp(token->str, "%lu")) { return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned long))); - } else if (!strcmp(token->str, "%llu") || - !strcmp(token->str, "%I64u")) { + } else if (!strcmp(token->str, "%llu")) { return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned long long))); + } else if (!strcmp(token->str, "%" PRIu64)) { + return QOBJECT(qnum_from_uint(va_arg(*ap, uint64_t))); } else if (!strcmp(token->str, "%s")) { return QOBJECT(qstring_from_str(va_arg(*ap, const char *))); } else if (!strcmp(token->str, "%f")) { diff --git a/tests/check-qjson.c b/tests/check-qjson.c index 83f8a0e6e3..f344ad921c 100644 --- a/tests/check-qjson.c +++ b/tests/check-qjson.c @@ -945,9 +945,11 @@ static void keyword_literal(void) static void interpolation_valid(void) { long long value_lld = 0x123456789abcdefLL; + int64_t value_d64 = value_lld; long value_ld = (long)value_lld; int value_d = (int)value_lld; unsigned long long value_llu = 0xfedcba9876543210ULL; + uint64_t value_u64 = value_llu; unsigned long value_lu = (unsigned long)value_llu; unsigned value_u = (unsigned)value_llu; double value_f = 2.323423423; @@ -985,6 +987,10 @@ static void interpolation_valid(void) g_assert_cmpint(qnum_get_int(qnum), ==, value_lld); qobject_unref(qnum); + qnum = qobject_to(QNum, qobject_from_jsonf_nofail("%" PRId64, value_d64)); + g_assert_cmpint(qnum_get_int(qnum), ==, value_lld); + qobject_unref(qnum); + qnum = qobject_to(QNum, qobject_from_jsonf_nofail("%u", value_u)); g_assert_cmpuint(qnum_get_uint(qnum), ==, value_u); qobject_unref(qnum); @@ -997,6 +1003,10 @@ static void interpolation_valid(void) g_assert_cmpuint(qnum_get_uint(qnum), ==, value_llu); qobject_unref(qnum); + qnum = qobject_to(QNum, qobject_from_jsonf_nofail("%" PRIu64, value_u64)); + g_assert_cmpuint(qnum_get_uint(qnum), ==, value_llu); + qobject_unref(qnum); + qnum = qobject_to(QNum, qobject_from_jsonf_nofail("%f", value_f)); g_assert(qnum_get_double(qnum) == value_f); qobject_unref(qnum); -- cgit 1.4.1 From e06d008ac833ac220b920f4d8858e6f62f7a59f9 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:40:11 +0200 Subject: json: Fix latent parser aborts at end of input json-parser.c carefully reports end of input like this: token = parser_context_pop_token(ctxt); if (token == NULL) { parse_error(ctxt, NULL, "premature EOI"); goto out; } Except parser_context_pop_token() can't return null, it fails its assertion instead. Same for parser_context_peek_token(). Broken in commit 65c0f1e9558, and faithfully preserved in commit 95385fe9ace. Only a latent bug, because the streamer throws away any input that could trigger it. Drop the assertions, so we can fix the streamer in the next commit. Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-45-armbru@redhat.com> --- qobject/json-parser.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'qobject/json-parser.c') diff --git a/qobject/json-parser.c b/qobject/json-parser.c index 27e873ad3b..e3ee2a273a 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -226,14 +226,12 @@ out: static JSONToken *parser_context_pop_token(JSONParserContext *ctxt) { g_free(ctxt->current); - assert(!g_queue_is_empty(ctxt->buf)); ctxt->current = g_queue_pop_head(ctxt->buf); return ctxt->current; } static JSONToken *parser_context_peek_token(JSONParserContext *ctxt) { - assert(!g_queue_is_empty(ctxt->buf)); return g_queue_peek_head(ctxt->buf); } -- cgit 1.4.1 From 5d50113cf675ec96337ac6eaf81d83fbf69273bc Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:40:13 +0200 Subject: json: Assert json_parser_parse() consumes all tokens on success Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-47-armbru@redhat.com> --- qobject/json-parser.c | 1 + 1 file changed, 1 insertion(+) (limited to 'qobject/json-parser.c') diff --git a/qobject/json-parser.c b/qobject/json-parser.c index e3ee2a273a..685e9dac24 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -544,6 +544,7 @@ QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp) QObject *result; result = parse_value(&ctxt, ap); + assert(ctxt.err || g_queue_is_empty(ctxt.buf)); error_propagate(errp, ctxt.err); -- cgit 1.4.1 From a2731e08ee8633fcdc2af944b8f8f315678f7669 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:40:17 +0200 Subject: json: Unbox tokens queue in JSONMessageParser Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-51-armbru@redhat.com> --- include/qapi/qmp/json-streamer.h | 2 +- qobject/json-parser.c | 1 - qobject/json-streamer.c | 30 +++++++++++------------------- 3 files changed, 12 insertions(+), 21 deletions(-) (limited to 'qobject/json-parser.c') diff --git a/include/qapi/qmp/json-streamer.h b/include/qapi/qmp/json-streamer.h index e162fd01da..d1d7fe2595 100644 --- a/include/qapi/qmp/json-streamer.h +++ b/include/qapi/qmp/json-streamer.h @@ -31,7 +31,7 @@ typedef struct JSONMessageParser JSONLexer lexer; int brace_count; int bracket_count; - GQueue *tokens; + GQueue tokens; uint64_t token_size; } JSONMessageParser; diff --git a/qobject/json-parser.c b/qobject/json-parser.c index 685e9dac24..e9a9f937f3 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -552,7 +552,6 @@ QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp) parser_context_pop_token(&ctxt); } g_free(ctxt.current); - g_queue_free(ctxt.buf); return result; } diff --git a/qobject/json-streamer.c b/qobject/json-streamer.c index 954bf9d468..9210281a65 100644 --- a/qobject/json-streamer.c +++ b/qobject/json-streamer.c @@ -22,17 +22,12 @@ #define MAX_TOKEN_COUNT (2ULL << 20) #define MAX_NESTING (1 << 10) -static void json_message_free_token(void *token, void *opaque) -{ - g_free(token); -} - static void json_message_free_tokens(JSONMessageParser *parser) { - if (parser->tokens) { - g_queue_foreach(parser->tokens, json_message_free_token, NULL); - g_queue_free(parser->tokens); - parser->tokens = NULL; + JSONToken *token; + + while ((token = g_queue_pop_head(&parser->tokens))) { + g_free(token); } } @@ -61,11 +56,10 @@ void json_message_process_token(JSONLexer *lexer, GString *input, error_setg(&err, "JSON parse error, stray '%s'", input->str); goto out_emit; case JSON_END_OF_INPUT: - if (g_queue_is_empty(parser->tokens)) { + if (g_queue_is_empty(&parser->tokens)) { return; } - json = json_parser_parse(parser->tokens, parser->ap, &err); - parser->tokens = NULL; + json = json_parser_parse(&parser->tokens, parser->ap, &err); goto out_emit; default: break; @@ -79,7 +73,7 @@ void json_message_process_token(JSONLexer *lexer, GString *input, error_setg(&err, "JSON token size limit exceeded"); goto out_emit; } - if (g_queue_get_length(parser->tokens) + 1 > MAX_TOKEN_COUNT) { + if (g_queue_get_length(&parser->tokens) + 1 > MAX_TOKEN_COUNT) { error_setg(&err, "JSON token count limit exceeded"); goto out_emit; } @@ -97,21 +91,19 @@ void json_message_process_token(JSONLexer *lexer, GString *input, parser->token_size += input->len; - g_queue_push_tail(parser->tokens, token); + g_queue_push_tail(&parser->tokens, token); if ((parser->brace_count > 0 || parser->bracket_count > 0) && parser->bracket_count >= 0 && parser->bracket_count >= 0) { return; } - json = json_parser_parse(parser->tokens, parser->ap, &err); - parser->tokens = NULL; + json = json_parser_parse(&parser->tokens, parser->ap, &err); out_emit: parser->brace_count = 0; parser->bracket_count = 0; json_message_free_tokens(parser); - parser->tokens = g_queue_new(); parser->token_size = 0; parser->emit(parser->opaque, json, err); } @@ -126,7 +118,7 @@ void json_message_parser_init(JSONMessageParser *parser, parser->ap = ap; parser->brace_count = 0; parser->bracket_count = 0; - parser->tokens = g_queue_new(); + g_queue_init(&parser->tokens); parser->token_size = 0; json_lexer_init(&parser->lexer, !!ap); @@ -141,7 +133,7 @@ void json_message_parser_feed(JSONMessageParser *parser, void json_message_parser_flush(JSONMessageParser *parser) { json_lexer_flush(&parser->lexer); - assert(g_queue_is_empty(parser->tokens)); + assert(g_queue_is_empty(&parser->tokens)); } void json_message_parser_destroy(JSONMessageParser *parser) -- cgit 1.4.1 From abe7c2067c21a89c6fd4cf2d8ba2fa37160d1d55 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:40:18 +0200 Subject: json: Make JSONToken opaque outside json-parser.c Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-52-armbru@redhat.com> --- include/qapi/qmp/json-parser.h | 4 ++++ include/qapi/qmp/json-streamer.h | 7 ------- qobject/json-parser.c | 19 +++++++++++++++++++ qobject/json-streamer.c | 8 +------- 4 files changed, 24 insertions(+), 14 deletions(-) (limited to 'qobject/json-parser.c') diff --git a/include/qapi/qmp/json-parser.h b/include/qapi/qmp/json-parser.h index a34209db7a..21b23d7bec 100644 --- a/include/qapi/qmp/json-parser.h +++ b/include/qapi/qmp/json-parser.h @@ -15,7 +15,11 @@ #define QEMU_JSON_PARSER_H #include "qemu-common.h" +#include "qapi/qmp/json-lexer.h" +typedef struct JSONToken JSONToken; + +JSONToken *json_token(JSONTokenType type, int x, int y, GString *tokstr); QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp); #endif diff --git a/include/qapi/qmp/json-streamer.h b/include/qapi/qmp/json-streamer.h index d1d7fe2595..29950ac37c 100644 --- a/include/qapi/qmp/json-streamer.h +++ b/include/qapi/qmp/json-streamer.h @@ -16,13 +16,6 @@ #include "qapi/qmp/json-lexer.h" -typedef struct JSONToken { - int type; - int x; - int y; - char str[]; -} JSONToken; - typedef struct JSONMessageParser { void (*emit)(void *opaque, QObject *json, Error *err); diff --git a/qobject/json-parser.c b/qobject/json-parser.c index e9a9f937f3..a247875f14 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -26,6 +26,13 @@ #include "qapi/qmp/json-lexer.h" #include "qapi/qmp/json-streamer.h" +struct JSONToken { + JSONTokenType type; + int x; + int y; + char str[]; +}; + typedef struct JSONParserContext { Error *err; @@ -538,6 +545,18 @@ static QObject *parse_value(JSONParserContext *ctxt, va_list *ap) } } +JSONToken *json_token(JSONTokenType type, int x, int y, GString *tokstr) +{ + JSONToken *token = g_malloc(sizeof(JSONToken) + tokstr->len + 1); + + token->type = type; + memcpy(token->str, tokstr->str, tokstr->len); + token->str[tokstr->len] = 0; + token->x = x; + token->y = y; + return token; +} + QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp) { JSONParserContext ctxt = { .buf = tokens }; diff --git a/qobject/json-streamer.c b/qobject/json-streamer.c index 9210281a65..467bc29413 100644 --- a/qobject/json-streamer.c +++ b/qobject/json-streamer.c @@ -82,13 +82,7 @@ void json_message_process_token(JSONLexer *lexer, GString *input, goto out_emit; } - token = g_malloc(sizeof(JSONToken) + input->len + 1); - token->type = type; - memcpy(token->str, input->str, input->len); - token->str[input->len] = 0; - token->x = x; - token->y = y; - + token = json_token(type, x, y, input); parser->token_size += input->len; g_queue_push_tail(&parser->tokens, token); -- cgit 1.4.1 From 86cdf9ec8dec2763702cc52fa412d108a5dc9608 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:40:20 +0200 Subject: json: Clean up headers The JSON parser has three public headers, json-lexer.h, json-parser.h, json-streamer.h. They all contain stuff that is of no interest outside qobject/json-*.c. Collect the public interface in include/qapi/qmp/json-parser.h, and everything else in qobject/json-parser-int.h. Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-54-armbru@redhat.com> --- include/qapi/qmp/json-lexer.h | 50 ------------------------------------- include/qapi/qmp/json-parser.h | 36 +++++++++++++++++++++------ include/qapi/qmp/json-streamer.h | 46 ---------------------------------- monitor.c | 2 +- qga/main.c | 2 +- qobject/json-lexer.c | 3 +-- qobject/json-parser-int.h | 54 ++++++++++++++++++++++++++++++++++++++++ qobject/json-parser.c | 4 +-- qobject/json-streamer.c | 4 +-- qobject/qjson.c | 2 +- tests/libqtest.c | 2 +- 11 files changed, 90 insertions(+), 115 deletions(-) delete mode 100644 include/qapi/qmp/json-lexer.h delete mode 100644 include/qapi/qmp/json-streamer.h create mode 100644 qobject/json-parser-int.h (limited to 'qobject/json-parser.c') diff --git a/include/qapi/qmp/json-lexer.h b/include/qapi/qmp/json-lexer.h deleted file mode 100644 index 508fc7bdaf..0000000000 --- a/include/qapi/qmp/json-lexer.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * JSON lexer - * - * Copyright IBM, Corp. 2009 - * - * Authors: - * Anthony Liguori - * - * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. - * See the COPYING.LIB file in the top-level directory. - * - */ - -#ifndef QEMU_JSON_LEXER_H -#define QEMU_JSON_LEXER_H - - -typedef enum json_token_type { - JSON_MIN = 100, - JSON_LCURLY = JSON_MIN, - JSON_RCURLY, - JSON_LSQUARE, - JSON_RSQUARE, - JSON_COLON, - JSON_COMMA, - JSON_INTEGER, - JSON_FLOAT, - JSON_KEYWORD, - JSON_STRING, - JSON_INTERP, - JSON_SKIP, - JSON_ERROR, - JSON_END_OF_INPUT, -} JSONTokenType; - -typedef struct JSONLexer { - int start_state, state; - GString *token; - int x, y; -} JSONLexer; - -void json_lexer_init(JSONLexer *lexer, bool enable_interpolation); - -void json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size); - -void json_lexer_flush(JSONLexer *lexer); - -void json_lexer_destroy(JSONLexer *lexer); - -#endif diff --git a/include/qapi/qmp/json-parser.h b/include/qapi/qmp/json-parser.h index 55f75954c3..7345a9bd5c 100644 --- a/include/qapi/qmp/json-parser.h +++ b/include/qapi/qmp/json-parser.h @@ -1,5 +1,5 @@ /* - * JSON Parser + * JSON Parser * * Copyright IBM, Corp. 2009 * @@ -11,14 +11,36 @@ * */ -#ifndef QEMU_JSON_PARSER_H -#define QEMU_JSON_PARSER_H +#ifndef QAPI_QMP_JSON_PARSER_H +#define QAPI_QMP_JSON_PARSER_H -#include "qapi/qmp/json-lexer.h" +typedef struct JSONLexer { + int start_state, state; + GString *token; + int x, y; +} JSONLexer; -typedef struct JSONToken JSONToken; +typedef struct JSONMessageParser { + void (*emit)(void *opaque, QObject *json, Error *err); + void *opaque; + va_list *ap; + JSONLexer lexer; + int brace_count; + int bracket_count; + GQueue tokens; + uint64_t token_size; +} JSONMessageParser; -JSONToken *json_token(JSONTokenType type, int x, int y, GString *tokstr); -QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp); +void json_message_parser_init(JSONMessageParser *parser, + void (*emit)(void *opaque, QObject *json, + Error *err), + void *opaque, va_list *ap); + +void json_message_parser_feed(JSONMessageParser *parser, + const char *buffer, size_t size); + +void json_message_parser_flush(JSONMessageParser *parser); + +void json_message_parser_destroy(JSONMessageParser *parser); #endif diff --git a/include/qapi/qmp/json-streamer.h b/include/qapi/qmp/json-streamer.h deleted file mode 100644 index 29950ac37c..0000000000 --- a/include/qapi/qmp/json-streamer.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * JSON streaming support - * - * Copyright IBM, Corp. 2009 - * - * Authors: - * Anthony Liguori - * - * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. - * See the COPYING.LIB file in the top-level directory. - * - */ - -#ifndef QEMU_JSON_STREAMER_H -#define QEMU_JSON_STREAMER_H - -#include "qapi/qmp/json-lexer.h" - -typedef struct JSONMessageParser -{ - void (*emit)(void *opaque, QObject *json, Error *err); - void *opaque; - va_list *ap; - JSONLexer lexer; - int brace_count; - int bracket_count; - GQueue tokens; - uint64_t token_size; -} JSONMessageParser; - -void json_message_process_token(JSONLexer *lexer, GString *input, - JSONTokenType type, int x, int y); - -void json_message_parser_init(JSONMessageParser *parser, - void (*emit)(void *opaque, QObject *json, - Error *err), - void *opaque, va_list *ap); - -void json_message_parser_feed(JSONMessageParser *parser, - const char *buffer, size_t size); - -void json_message_parser_flush(JSONMessageParser *parser); - -void json_message_parser_destroy(JSONMessageParser *parser); - -#endif diff --git a/monitor.c b/monitor.c index 3dbdcb5190..021c11b1bf 100644 --- a/monitor.c +++ b/monitor.c @@ -58,7 +58,7 @@ #include "qapi/qmp/qnum.h" #include "qapi/qmp/qstring.h" #include "qapi/qmp/qjson.h" -#include "qapi/qmp/json-streamer.h" +#include "qapi/qmp/json-parser.h" #include "qapi/qmp/qlist.h" #include "qom/object_interfaces.h" #include "trace-root.h" diff --git a/qga/main.c b/qga/main.c index b74e1241ef..6d70242d05 100644 --- a/qga/main.c +++ b/qga/main.c @@ -18,7 +18,7 @@ #include #include #endif -#include "qapi/qmp/json-streamer.h" +#include "qapi/qmp/json-parser.h" #include "qapi/qmp/qdict.h" #include "qapi/qmp/qjson.h" #include "qapi/qmp/qstring.h" diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c index 06ec67dc45..e1745a3d95 100644 --- a/qobject/json-lexer.c +++ b/qobject/json-lexer.c @@ -12,8 +12,7 @@ */ #include "qemu/osdep.h" -#include "qapi/qmp/json-lexer.h" -#include "qapi/qmp/json-streamer.h" +#include "json-parser-int.h" #define MAX_TOKEN_SIZE (64ULL << 20) diff --git a/qobject/json-parser-int.h b/qobject/json-parser-int.h new file mode 100644 index 0000000000..ceaa890ec6 --- /dev/null +++ b/qobject/json-parser-int.h @@ -0,0 +1,54 @@ +/* + * JSON Parser + * + * Copyright IBM, Corp. 2009 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef JSON_PARSER_INT_H +#define JSON_PARSER_INT_H + +#include "qapi/qmp/json-parser.h" + + +typedef enum json_token_type { + JSON_MIN = 100, + JSON_LCURLY = JSON_MIN, + JSON_RCURLY, + JSON_LSQUARE, + JSON_RSQUARE, + JSON_COLON, + JSON_COMMA, + JSON_INTEGER, + JSON_FLOAT, + JSON_KEYWORD, + JSON_STRING, + JSON_INTERP, + JSON_SKIP, + JSON_ERROR, + JSON_END_OF_INPUT, +} JSONTokenType; + +typedef struct JSONToken JSONToken; + +/* json-lexer.c */ +void json_lexer_init(JSONLexer *lexer, bool enable_interpolation); +void json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size); +void json_lexer_flush(JSONLexer *lexer); +void json_lexer_destroy(JSONLexer *lexer); + +/* json-streamer.c */ +void json_message_process_token(JSONLexer *lexer, GString *input, + JSONTokenType type, int x, int y); + +/* json-parser.c */ +JSONToken *json_token(JSONTokenType type, int x, int y, GString *tokstr); +QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp); + +#endif diff --git a/qobject/json-parser.c b/qobject/json-parser.c index a247875f14..7449684f1c 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -22,9 +22,7 @@ #include "qapi/qmp/qnull.h" #include "qapi/qmp/qnum.h" #include "qapi/qmp/qstring.h" -#include "qapi/qmp/json-parser.h" -#include "qapi/qmp/json-lexer.h" -#include "qapi/qmp/json-streamer.h" +#include "json-parser-int.h" struct JSONToken { JSONTokenType type; diff --git a/qobject/json-streamer.c b/qobject/json-streamer.c index da53e770e9..47dd7ea576 100644 --- a/qobject/json-streamer.c +++ b/qobject/json-streamer.c @@ -13,9 +13,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "qapi/qmp/json-lexer.h" -#include "qapi/qmp/json-parser.h" -#include "qapi/qmp/json-streamer.h" +#include "json-parser-int.h" #define MAX_TOKEN_SIZE (64ULL << 20) #define MAX_TOKEN_COUNT (2ULL << 20) diff --git a/qobject/qjson.c b/qobject/qjson.c index b9ccae2c2a..db36101f3b 100644 --- a/qobject/qjson.c +++ b/qobject/qjson.c @@ -13,7 +13,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "qapi/qmp/json-streamer.h" +#include "qapi/qmp/json-parser.h" #include "qapi/qmp/qjson.h" #include "qapi/qmp/qbool.h" #include "qapi/qmp/qdict.h" diff --git a/tests/libqtest.c b/tests/libqtest.c index 5973a67652..d635c5bea0 100644 --- a/tests/libqtest.c +++ b/tests/libqtest.c @@ -24,7 +24,7 @@ #include "qemu-common.h" #include "qemu/cutils.h" #include "qapi/error.h" -#include "qapi/qmp/json-streamer.h" +#include "qapi/qmp/json-parser.h" #include "qapi/qmp/qdict.h" #include "qapi/qmp/qjson.h" #include "qapi/qmp/qlist.h" -- cgit 1.4.1 From ada74c3ba1b4f51e4462e186c251eaa974015bb8 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:40:22 +0200 Subject: json: Keep interpolation state in JSONParserContext The recursive descent parser passes along a pointer to JSONParserContext. It additionally passes a pointer to interpolation state (a va_alist *) as needed to reach its consumer parse_interpolation(). Stuffing the latter pointer into JSONParserContext saves us the trouble of passing it along, so do that. Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-56-armbru@redhat.com> --- qobject/json-parser.c | 59 ++++++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 29 deletions(-) (limited to 'qobject/json-parser.c') diff --git a/qobject/json-parser.c b/qobject/json-parser.c index 7449684f1c..273f448a52 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -36,6 +36,7 @@ typedef struct JSONParserContext Error *err; JSONToken *current; GQueue *buf; + va_list *ap; } JSONParserContext; #define BUG_ON(cond) assert(!(cond)) @@ -49,7 +50,7 @@ typedef struct JSONParserContext * 4) deal with premature EOI */ -static QObject *parse_value(JSONParserContext *ctxt, va_list *ap); +static QObject *parse_value(JSONParserContext *ctxt); /** * Error handler @@ -243,7 +244,7 @@ static JSONToken *parser_context_peek_token(JSONParserContext *ctxt) /** * Parsing rules */ -static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap) +static int parse_pair(JSONParserContext *ctxt, QDict *dict) { QObject *value; QString *key = NULL; @@ -255,7 +256,7 @@ static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap) goto out; } - key = qobject_to(QString, parse_value(ctxt, ap)); + key = qobject_to(QString, parse_value(ctxt)); if (!key) { parse_error(ctxt, peek, "key is not a string in object"); goto out; @@ -272,7 +273,7 @@ static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap) goto out; } - value = parse_value(ctxt, ap); + value = parse_value(ctxt); if (value == NULL) { parse_error(ctxt, token, "Missing value in dict"); goto out; @@ -290,7 +291,7 @@ out: return -1; } -static QObject *parse_object(JSONParserContext *ctxt, va_list *ap) +static QObject *parse_object(JSONParserContext *ctxt) { QDict *dict = NULL; JSONToken *token, *peek; @@ -307,7 +308,7 @@ static QObject *parse_object(JSONParserContext *ctxt, va_list *ap) } if (peek->type != JSON_RCURLY) { - if (parse_pair(ctxt, dict, ap) == -1) { + if (parse_pair(ctxt, dict) == -1) { goto out; } @@ -323,7 +324,7 @@ static QObject *parse_object(JSONParserContext *ctxt, va_list *ap) goto out; } - if (parse_pair(ctxt, dict, ap) == -1) { + if (parse_pair(ctxt, dict) == -1) { goto out; } @@ -344,7 +345,7 @@ out: return NULL; } -static QObject *parse_array(JSONParserContext *ctxt, va_list *ap) +static QObject *parse_array(JSONParserContext *ctxt) { QList *list = NULL; JSONToken *token, *peek; @@ -363,7 +364,7 @@ static QObject *parse_array(JSONParserContext *ctxt, va_list *ap) if (peek->type != JSON_RSQUARE) { QObject *obj; - obj = parse_value(ctxt, ap); + obj = parse_value(ctxt); if (obj == NULL) { parse_error(ctxt, token, "expecting value"); goto out; @@ -383,7 +384,7 @@ static QObject *parse_array(JSONParserContext *ctxt, va_list *ap) goto out; } - obj = parse_value(ctxt, ap); + obj = parse_value(ctxt); if (obj == NULL) { parse_error(ctxt, token, "expecting value"); goto out; @@ -426,7 +427,7 @@ static QObject *parse_keyword(JSONParserContext *ctxt) return NULL; } -static QObject *parse_interpolation(JSONParserContext *ctxt, va_list *ap) +static QObject *parse_interpolation(JSONParserContext *ctxt) { JSONToken *token; @@ -434,29 +435,29 @@ static QObject *parse_interpolation(JSONParserContext *ctxt, va_list *ap) assert(token && token->type == JSON_INTERP); if (!strcmp(token->str, "%p")) { - return va_arg(*ap, QObject *); + return va_arg(*ctxt->ap, QObject *); } else if (!strcmp(token->str, "%i")) { - return QOBJECT(qbool_from_bool(va_arg(*ap, int))); + return QOBJECT(qbool_from_bool(va_arg(*ctxt->ap, int))); } else if (!strcmp(token->str, "%d")) { - return QOBJECT(qnum_from_int(va_arg(*ap, int))); + return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int))); } else if (!strcmp(token->str, "%ld")) { - return QOBJECT(qnum_from_int(va_arg(*ap, long))); + return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long))); } else if (!strcmp(token->str, "%lld")) { - return QOBJECT(qnum_from_int(va_arg(*ap, long long))); + return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long long))); } else if (!strcmp(token->str, "%" PRId64)) { - return QOBJECT(qnum_from_int(va_arg(*ap, int64_t))); + return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int64_t))); } else if (!strcmp(token->str, "%u")) { - return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned int))); + return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned int))); } else if (!strcmp(token->str, "%lu")) { - return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned long))); + return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long))); } else if (!strcmp(token->str, "%llu")) { - return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned long long))); + return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long long))); } else if (!strcmp(token->str, "%" PRIu64)) { - return QOBJECT(qnum_from_uint(va_arg(*ap, uint64_t))); + return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, uint64_t))); } else if (!strcmp(token->str, "%s")) { - return QOBJECT(qstring_from_str(va_arg(*ap, const char *))); + return QOBJECT(qstring_from_str(va_arg(*ctxt->ap, const char *))); } else if (!strcmp(token->str, "%f")) { - return QOBJECT(qnum_from_double(va_arg(*ap, double))); + return QOBJECT(qnum_from_double(va_arg(*ctxt->ap, double))); } parse_error(ctxt, token, "invalid interpolation '%s'", token->str); return NULL; @@ -514,7 +515,7 @@ static QObject *parse_literal(JSONParserContext *ctxt) } } -static QObject *parse_value(JSONParserContext *ctxt, va_list *ap) +static QObject *parse_value(JSONParserContext *ctxt) { JSONToken *token; @@ -526,11 +527,11 @@ static QObject *parse_value(JSONParserContext *ctxt, va_list *ap) switch (token->type) { case JSON_LCURLY: - return parse_object(ctxt, ap); + return parse_object(ctxt); case JSON_LSQUARE: - return parse_array(ctxt, ap); + return parse_array(ctxt); case JSON_INTERP: - return parse_interpolation(ctxt, ap); + return parse_interpolation(ctxt); case JSON_INTEGER: case JSON_FLOAT: case JSON_STRING: @@ -557,10 +558,10 @@ JSONToken *json_token(JSONTokenType type, int x, int y, GString *tokstr) QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp) { - JSONParserContext ctxt = { .buf = tokens }; + JSONParserContext ctxt = { .buf = tokens, .ap = ap }; QObject *result; - result = parse_value(&ctxt, ap); + result = parse_value(&ctxt); assert(ctxt.err || g_queue_is_empty(ctxt.buf)); error_propagate(errp, ctxt.err); -- cgit 1.4.1 From 16a485992112be1c8b47b58b0124357db9037093 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:40:23 +0200 Subject: json: Improve safety of qobject_from_jsonf_nofail() & friends The JSON parser optionally supports interpolation. This is used to build QObjects by parsing string templates. The templates are C literals, so parse errors (such as invalid interpolation specifications) are actually programming errors. Consequently, the functions providing parsing with interpolation (qobject_from_jsonf_nofail(), qobject_from_vjsonf_nofail(), qdict_from_jsonf_nofail(), qdict_from_vjsonf_nofail()) pass &error_abort to the parser. However, there's another, more dangerous kind of programming error: since we use va_arg() to get the value to interpolate, behavior is undefined when the variable argument isn't consistent with the interpolation specification. The same problem exists with printf()-like functions, and the solution is to have the compiler check consistency. This is what GCC_FMT_ATTR() is about. To enable this type checking for interpolation as well, we carefully chose our interpolation specifications to match printf conversion specifications, and decorate functions parsing templates with GCC_FMT_ATTR(). Note that this only protects against undefined behavior due to type errors. It can't protect against use of invalid interpolation specifications that happen to be valid printf conversion specifications. However, there's still a gaping hole in the type checking: GCC recognizes '%' as start of printf conversion specification anywhere in the template, but the parser recognizes it only outside JSON strings. For instance, if someone were to pass a "{ '%s': %d }" template, GCC would require a char * and an int argument, but the parser would va_arg() only an int argument, resulting in undefined behavior. Avoid undefined behavior by catching the programming error at run time: have the parser recognize and reject '%' in JSON strings. Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-57-armbru@redhat.com> --- qobject/json-parser.c | 12 ++++++++++-- tests/check-qjson.c | 17 +++++++---------- 2 files changed, 17 insertions(+), 12 deletions(-) (limited to 'qobject/json-parser.c') diff --git a/qobject/json-parser.c b/qobject/json-parser.c index 273f448a52..63e9229f1c 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -144,7 +144,8 @@ static QString *parse_string(JSONParserContext *ctxt, JSONToken *token) while (*ptr != quote) { assert(*ptr); - if (*ptr == '\\') { + switch (*ptr) { + case '\\': beg = ptr++; switch (*ptr++) { case '"': @@ -205,7 +206,14 @@ static QString *parse_string(JSONParserContext *ctxt, JSONToken *token) parse_error(ctxt, token, "invalid escape sequence in string"); goto out; } - } else { + break; + case '%': + if (ctxt->ap) { + parse_error(ctxt, token, "can't interpolate into string"); + goto out; + } + /* fall through */ + default: cp = mod_utf8_codepoint(ptr, 6, &end); if (cp < 0) { parse_error(ctxt, token, "invalid UTF-8 sequence in string"); diff --git a/tests/check-qjson.c b/tests/check-qjson.c index 936258ddd4..a1854573de 100644 --- a/tests/check-qjson.c +++ b/tests/check-qjson.c @@ -1037,16 +1037,13 @@ static void interpolation_unknown(void) static void interpolation_string(void) { - QLitObject decoded = QLIT_QLIST(((QLitObject[]){ - QLIT_QSTR("%s"), - QLIT_QSTR("eins"), - {}})); - QObject *qobj; - - /* Dangerous misfeature: % is silently ignored in strings */ - qobj = qobject_from_jsonf_nofail("['%s', %s]", "eins", "zwei"); - g_assert(qlit_equal_qobject(&decoded, qobj)); - qobject_unref(qobj); + if (g_test_subprocess()) { + qobject_from_jsonf_nofail("['%s', %s]", "eins", "zwei"); + } + g_test_trap_subprocess(NULL, 0, 0); + g_test_trap_assert_failed(); + g_test_trap_assert_stderr("*Unexpected error*" + "can't interpolate into string*"); } static void simple_dict(void) -- cgit 1.4.1 From 8bca4613e6cddd948895b8db3def05950463495b Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:40:24 +0200 Subject: json: Support %% in JSON strings when interpolating The previous commit makes JSON strings containing '%' awkward to express in templates: you'd have to mask the '%' with an Unicode escape \u0025. No template currently contains such JSON strings. Support the printf conversion specification %% in JSON strings as a convenience anyway, because it's trivially easy to do. Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-58-armbru@redhat.com> --- qobject/json-parser.c | 3 ++- tests/check-qjson.c | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'qobject/json-parser.c') diff --git a/qobject/json-parser.c b/qobject/json-parser.c index 63e9229f1c..3318b8dad0 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -208,10 +208,11 @@ static QString *parse_string(JSONParserContext *ctxt, JSONToken *token) } break; case '%': - if (ctxt->ap) { + if (ctxt->ap && ptr[1] != '%') { parse_error(ctxt, token, "can't interpolate into string"); goto out; } + ptr++; /* fall through */ default: cp = mod_utf8_codepoint(ptr, 6, &end); diff --git a/tests/check-qjson.c b/tests/check-qjson.c index a1854573de..cc13f3d41e 100644 --- a/tests/check-qjson.c +++ b/tests/check-qjson.c @@ -1270,7 +1270,7 @@ static void simple_interpolation(void) QObject *obj; QLitObject decoded = QLIT_QLIST(((QLitObject[]){ QLIT_QNUM(1), - QLIT_QNUM(2), + QLIT_QSTR("100%"), QLIT_QLIST(((QLitObject[]){ QLIT_QNUM(32), QLIT_QNUM(42), @@ -1280,7 +1280,7 @@ static void simple_interpolation(void) embedded_obj = qobject_from_json("[32, 42]", &error_abort); g_assert(embedded_obj != NULL); - obj = qobject_from_jsonf_nofail("[%d, 2, %p]", 1, embedded_obj); + obj = qobject_from_jsonf_nofail("[%d, '100%%', %p]", 1, embedded_obj); g_assert(qlit_equal_qobject(&decoded, obj)); qobject_unref(obj); -- cgit 1.4.1 From 37aded92c27d0e56cd27f1c29494fc9f8c873cdd Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:40:25 +0200 Subject: json: Update references to RFC 7159 to RFC 8259 RFC 8259 (December 2017) obsoletes RFC 7159 (March 2014). Signed-off-by: Markus Armbruster Message-Id: <20180823164025.12553-59-armbru@redhat.com> Reviewed-by: Eric Blake --- include/qapi/qmp/qnum.h | 2 +- qapi/introspect.json | 2 +- qobject/json-parser.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'qobject/json-parser.c') diff --git a/include/qapi/qmp/qnum.h b/include/qapi/qmp/qnum.h index 45bf02a036..bbae0a5ec8 100644 --- a/include/qapi/qmp/qnum.h +++ b/include/qapi/qmp/qnum.h @@ -25,7 +25,7 @@ typedef enum { /* * QNum encapsulates how our dialect of JSON fills in the blanks left - * by the JSON specification (RFC 7159) regarding numbers. + * by the JSON specification (RFC 8259) regarding numbers. * * Conceptually, we treat number as an abstract type with three * concrete subtypes: floating-point, signed integer, unsigned diff --git a/qapi/introspect.json b/qapi/introspect.json index 137b39b992..3d22166b2b 100644 --- a/qapi/introspect.json +++ b/qapi/introspect.json @@ -120,7 +120,7 @@ ## # @JSONType: # -# The four primitive and two structured types according to RFC 7159 +# The four primitive and two structured types according to RFC 8259 # section 1, plus 'int' (split off 'number'), plus the obvious top # type 'value'. # diff --git a/qobject/json-parser.c b/qobject/json-parser.c index 3318b8dad0..5a840dfd86 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -516,7 +516,7 @@ static QObject *parse_literal(JSONParserContext *ctxt) } case JSON_FLOAT: /* FIXME dependent on locale; a pervasive issue in QEMU */ - /* FIXME our lexer matches RFC 7159 in forbidding Inf or NaN, + /* FIXME our lexer matches RFC 8259 in forbidding Inf or NaN, * but those might be useful extensions beyond JSON */ return QOBJECT(qnum_from_double(strtod(token->str, NULL))); default: -- cgit 1.4.1