From a2ec6be72b80770b063cf08c95c78f0d36705355 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:39:44 +0200 Subject: json: Fix lexer to include the bad character in JSON_ERROR token json_lexer[] maps (lexer state, input character) to the new lexer state. The input character is consumed unless the new state is terminal and the input character doesn't belong to this token, i.e. the state transition uses look-ahead. When this is the case, input character '\0' would result in the same state transition. TERMINAL_NEEDED_LOOKAHEAD() exploits this. Except this is wrong for transitions to IN_ERROR. There, the offending input character is in fact consumed: case IN_ERROR returns. It isn't added to the JSON_ERROR token, though. Fix that by making TERMINAL_NEEDED_LOOKAHEAD() return false for transitions to IN_ERROR. There's a slight complication. json_lexer_flush() passes input character '\0' to flush an incomplete token. If this results in JSON_ERROR, we'd now add the '\0' to the token. Suppress that. Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-18-armbru@redhat.com> --- qobject/json-lexer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'qobject/json-lexer.c') diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c index 980ba159d6..7c0875d225 100644 --- a/qobject/json-lexer.c +++ b/qobject/json-lexer.c @@ -76,7 +76,7 @@ QEMU_BUILD_BUG_ON((int)JSON_MIN <= (int)IN_START); from OLD_STATE required lookahead. This happens whenever the table below uses the TERMINAL macro. */ #define TERMINAL_NEEDED_LOOKAHEAD(old_state, terminal) \ - (json_lexer[(old_state)][0] == (terminal)) + (terminal != IN_ERROR && json_lexer[(old_state)][0] == (terminal)) static const uint8_t json_lexer[][256] = { /* Relies on default initialization to IN_ERROR! */ @@ -304,7 +304,7 @@ static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) assert(lexer->state <= ARRAY_SIZE(json_lexer)); new_state = json_lexer[lexer->state][(uint8_t)ch]; char_consumed = !TERMINAL_NEEDED_LOOKAHEAD(lexer->state, new_state); - if (char_consumed) { + if (char_consumed && !flush) { g_string_append_c(lexer->token, ch); } -- cgit 1.4.1 From 340db1ed82f8ced40a3e778c08963005369e2926 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:39:45 +0200 Subject: json: Reject unescaped control characters Fix the lexer to reject unescaped control characters in JSON strings, in accordance with RFC 8259 "The JavaScript Object Notation (JSON) Data Interchange Format". Bonus: we now recover more nicely from unclosed strings. E.g. {"one: 1}\n{"two": 2} now recovers cleanly after the newline, where before the lexer remained confused until the next unpaired double quote or lexical error. Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-19-armbru@redhat.com> --- qobject/json-lexer.c | 4 ++-- tests/check-qjson.c | 6 +----- tests/qmp-test.c | 4 ++-- 3 files changed, 5 insertions(+), 9 deletions(-) (limited to 'qobject/json-lexer.c') diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c index 7c0875d225..e85e9a78ff 100644 --- a/qobject/json-lexer.c +++ b/qobject/json-lexer.c @@ -115,7 +115,7 @@ static const uint8_t json_lexer[][256] = { ['u'] = IN_DQ_UCODE0, }, [IN_DQ_STRING] = { - [1 ... 0xBF] = IN_DQ_STRING, + [0x20 ... 0xBF] = IN_DQ_STRING, [0xC2 ... 0xF4] = IN_DQ_STRING, ['\\'] = IN_DQ_STRING_ESCAPE, ['"'] = JSON_STRING, @@ -155,7 +155,7 @@ static const uint8_t json_lexer[][256] = { ['u'] = IN_SQ_UCODE0, }, [IN_SQ_STRING] = { - [1 ... 0xBF] = IN_SQ_STRING, + [0x20 ... 0xBF] = IN_SQ_STRING, [0xC2 ... 0xF4] = IN_SQ_STRING, ['\\'] = IN_SQ_STRING_ESCAPE, ['\''] = JSON_STRING, diff --git a/tests/check-qjson.c b/tests/check-qjson.c index 1688b2f5c1..f1405ad47a 100644 --- a/tests/check-qjson.c +++ b/tests/check-qjson.c @@ -202,11 +202,7 @@ static void utf8_string(void) "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F" "\x10\x11\x12\x13\x14\x15\x16\x17" "\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F", - /* bug: not corrected (valid UTF-8, but invalid JSON) */ - "\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F", + NULL, "\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007" "\\b\\t\\n\\u000B\\f\\r\\u000E\\u000F" "\\u0010\\u0011\\u0012\\u0013\\u0014\\u0015\\u0016\\u0017" diff --git a/tests/qmp-test.c b/tests/qmp-test.c index 5edc97f63f..7b3ba17c4a 100644 --- a/tests/qmp-test.c +++ b/tests/qmp-test.c @@ -86,9 +86,9 @@ static void test_malformed(QTestState *qts) g_assert(recovered(qts)); /* lexical error: control character in string */ - qtest_qmp_send_raw(qts, "{'execute': 'nonexistent', 'id':'\n'}"); + qtest_qmp_send_raw(qts, "{'execute': 'nonexistent', 'id':'\n"); resp = qtest_qmp_receive(qts); - g_assert_cmpstr(get_error_class(resp), ==, "CommandNotFound"); /* BUG */ + g_assert_cmpstr(get_error_class(resp), ==, "GenericError"); qobject_unref(resp); g_assert(recovered(qts)); -- cgit 1.4.1 From eddc0a7f0ad84edd0f8dd27d4a70a305ccd7bc5f Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:39:46 +0200 Subject: json: Revamp lexer documentation Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-20-armbru@redhat.com> --- qobject/json-lexer.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 71 insertions(+), 9 deletions(-) (limited to 'qobject/json-lexer.c') diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c index e85e9a78ff..902fe60846 100644 --- a/qobject/json-lexer.c +++ b/qobject/json-lexer.c @@ -18,21 +18,83 @@ #define MAX_TOKEN_SIZE (64ULL << 20) /* - * Required by JSON (RFC 7159): + * From RFC 8259 "The JavaScript Object Notation (JSON) Data + * Interchange Format", with [comments in brackets]: * - * \"([^\\\"]|\\[\"'\\/bfnrt]|\\u[0-9a-fA-F]{4})*\" - * -?(0|[1-9][0-9]*)(.[0-9]+)?([eE][-+]?[0-9]+)? - * [{}\[\],:] - * [a-z]+ # covers null, true, false + * The set of tokens includes six structural characters, strings, + * numbers, and three literal names. * - * Extension of '' strings: + * These are the six structural characters: * - * '([^\\']|\\[\"'\\/bfnrt]|\\u[0-9a-fA-F]{4})*' + * begin-array = ws %x5B ws ; [ left square bracket + * begin-object = ws %x7B ws ; { left curly bracket + * end-array = ws %x5D ws ; ] right square bracket + * end-object = ws %x7D ws ; } right curly bracket + * name-separator = ws %x3A ws ; : colon + * value-separator = ws %x2C ws ; , comma * - * Extension for vararg handling in JSON construction: + * Insignificant whitespace is allowed before or after any of the six + * structural characters. + * [This lexer accepts it before or after any token, which is actually + * the same, as the grammar always has structural characters between + * other tokens.] * - * %((l|ll|I64)?d|[ipsf]) + * ws = *( + * %x20 / ; Space + * %x09 / ; Horizontal tab + * %x0A / ; Line feed or New line + * %x0D ) ; Carriage return * + * [...] three literal names: + * false null true + * [This lexer accepts [a-z]+, and leaves rejecting unknown literal + * names to the parser.] + * + * [Numbers:] + * + * number = [ minus ] int [ frac ] [ exp ] + * decimal-point = %x2E ; . + * digit1-9 = %x31-39 ; 1-9 + * e = %x65 / %x45 ; e E + * exp = e [ minus / plus ] 1*DIGIT + * frac = decimal-point 1*DIGIT + * int = zero / ( digit1-9 *DIGIT ) + * minus = %x2D ; - + * plus = %x2B ; + + * zero = %x30 ; 0 + * + * [Strings:] + * string = quotation-mark *char quotation-mark + * + * char = unescaped / + * escape ( + * %x22 / ; " quotation mark U+0022 + * %x5C / ; \ reverse solidus U+005C + * %x2F / ; / solidus U+002F + * %x62 / ; b backspace U+0008 + * %x66 / ; f form feed U+000C + * %x6E / ; n line feed U+000A + * %x72 / ; r carriage return U+000D + * %x74 / ; t tab U+0009 + * %x75 4HEXDIG ) ; uXXXX U+XXXX + * escape = %x5C ; \ + * quotation-mark = %x22 ; " + * unescaped = %x20-21 / %x23-5B / %x5D-10FFFF + * + * + * Extensions over RFC 8259: + * - Extra escape sequence in strings: + * 0x27 (apostrophe) is recognized after escape, too + * - Single-quoted strings: + * Like double-quoted strings, except they're delimited by %x27 + * (apostrophe) instead of %x22 (quotation mark), and can't contain + * unescaped apostrophe, but can contain unescaped quotation mark. + * - Interpolation: + * interpolation = %((l|ll|I64)[du]|[ipsf]) + * + * Note: + * - Input must be encoded in UTF-8. + * - Decoding and validating is left to the parser. */ enum json_lexer_state { -- cgit 1.4.1 From de930f45cb56ccf7535cbacee3f3686d416f5283 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:39:51 +0200 Subject: json: Leave rejecting invalid UTF-8 to parser Both the lexer and the parser (attempt to) validate UTF-8 in JSON strings. The lexer rejects bytes that can't occur in valid UTF-8: \xC0..\xC1, \xF5..\xFF. This rejects some, but not all invalid UTF-8. It also rejects ASCII control characters \x00..\x1F, in accordance with RFC 8259 (see recent commit "json: Reject unescaped control characters"). When the lexer rejects, it ends the token right after the first bad byte. Good when the bad byte is a newline. Not so good when it's something like an overlong sequence in the middle of a string. For instance, input {"abc\xC0\xAFijk": 1}\n produces the tokens JSON_LCURLY { JSON_ERROR "abc\xC0 JSON_ERROR \xAF JSON_KEYWORD ijk JSON_ERROR ": 1}\n The parser then reports four errors Invalid JSON syntax Invalid JSON syntax JSON parse error, invalid keyword 'ijk' Invalid JSON syntax before it recovers at the newline. The commit before previous made the parser reject invalid UTF-8 sequences. Since then, anything the lexer rejects, the parser would reject as well. Thus, the lexer's rejecting is unnecessary for correctness, and harmful for error reporting. However, we want to keep rejecting ASCII control characters in the lexer, because that produces the behavior we want for unclosed strings. We also need to keep rejecting \xFF in the lexer, because we documented that as a way to reset the JSON parser (docs/interop/qmp-spec.txt section 2.6 QGA Synchronization), which means we can't change how we recover from this error now. I wish we hadn't done that. I think we should treat \xFE the same as \xFF. Change the lexer to accept \xC0..\xC1 and \xF5..\xFD. It now rejects only \x00..\x1F and \xFE..\xFF. Error reporting for invalid UTF-8 in strings is much improved, except for \xFE and \xFF. For the example above, the lexer now produces JSON_LCURLY { JSON_STRING "abc\xC0\xAFijk" JSON_COLON : JSON_INTEGER 1 JSON_RCURLY and the parser reports just JSON parse error, invalid UTF-8 sequence in string Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-25-armbru@redhat.com> --- qobject/json-lexer.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'qobject/json-lexer.c') diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c index 902fe60846..93fa2737e6 100644 --- a/qobject/json-lexer.c +++ b/qobject/json-lexer.c @@ -177,8 +177,7 @@ static const uint8_t json_lexer[][256] = { ['u'] = IN_DQ_UCODE0, }, [IN_DQ_STRING] = { - [0x20 ... 0xBF] = IN_DQ_STRING, - [0xC2 ... 0xF4] = IN_DQ_STRING, + [0x20 ... 0xFD] = IN_DQ_STRING, ['\\'] = IN_DQ_STRING_ESCAPE, ['"'] = JSON_STRING, }, @@ -217,8 +216,7 @@ static const uint8_t json_lexer[][256] = { ['u'] = IN_SQ_UCODE0, }, [IN_SQ_STRING] = { - [0x20 ... 0xBF] = IN_SQ_STRING, - [0xC2 ... 0xF4] = IN_SQ_STRING, + [0x20 ... 0xFD] = IN_SQ_STRING, ['\\'] = IN_SQ_STRING_ESCAPE, ['\''] = JSON_STRING, }, -- cgit 1.4.1 From 4b1c0cd7c7f9f9cf2e46c0a9c9cd88b2cba3decd Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:39:52 +0200 Subject: json: Accept overlong \xC0\x80 as U+0000 ("modified UTF-8") Since the JSON grammer doesn't accept U+0000 anywhere, this merely exchanges one kind of parse error for another. It's purely for consistency with qobject_to_json(), which accepts \xC0\x80 (see commit e2ec3f97680). Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-26-armbru@redhat.com> --- qobject/json-lexer.c | 2 +- qobject/json-parser.c | 2 +- tests/check-qjson.c | 8 +------- 3 files changed, 3 insertions(+), 9 deletions(-) (limited to 'qobject/json-lexer.c') diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c index 93fa2737e6..4c402f62d3 100644 --- a/qobject/json-lexer.c +++ b/qobject/json-lexer.c @@ -93,7 +93,7 @@ * interpolation = %((l|ll|I64)[du]|[ipsf]) * * Note: - * - Input must be encoded in UTF-8. + * - Input must be encoded in modified UTF-8. * - Decoding and validating is left to the parser. */ diff --git a/qobject/json-parser.c b/qobject/json-parser.c index b77931614b..a9b227f56c 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -200,7 +200,7 @@ static QString *qstring_from_escaped_str(JSONParserContext *ctxt, } } else { cp = mod_utf8_codepoint(ptr, 6, &end); - if (cp <= 0) { + if (cp < 0) { parse_error(ctxt, token, "invalid UTF-8 sequence in string"); goto out; } diff --git a/tests/check-qjson.c b/tests/check-qjson.c index 71c77d2f70..3abf12b4d2 100644 --- a/tests/check-qjson.c +++ b/tests/check-qjson.c @@ -152,12 +152,6 @@ static void string_with_quotes(void) static void utf8_string(void) { /* - * Problem: we can't easily deal with embedded U+0000. Parsing - * the JSON string "this \\u0000" is fun" yields "this \0 is fun", - * which gets misinterpreted as NUL-terminated "this ". We should - * consider using overlong encoding \xC0\x80 for U+0000 ("modified - * UTF-8"). - * * Most test cases are scraped from Markus Kuhn's UTF-8 decoder * capability and stress test at * http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt @@ -586,7 +580,7 @@ static void utf8_string(void) { /* \U+0000 */ "\xC0\x80", - NULL, + "\xC0\x80", "\\u0000", }, { -- cgit 1.4.1 From b2da4a4d7537567b44db60b7b79cd14f64e48f2f Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:39:53 +0200 Subject: json: Leave rejecting invalid escape sequences to parser Both lexer and parser reject invalid escape sequences in strings. The parser's check is useless. The lexer ends the token right after the first non-well-formed byte. This tends to lead to suboptimal error reporting. For instance, input {"abc\@ijk": 1} produces the tokens JSON_LCURLY { JSON_ERROR "abc\@ JSON_KEYWORD ijk JSON_ERROR ": 1}\n The parser then reports three errors Invalid JSON syntax JSON parse error, invalid keyword 'ijk' Invalid JSON syntax before it recovers at the newline. Drop the lexer's escape sequence checking, and make it accept the same characters after backslash it accepts elsewhere in strings. It now produces JSON_LCURLY { JSON_STRING "abc\@ijk" JSON_COLON : JSON_INTEGER 1 JSON_RCURLY and the parser reports just JSON parse error, invalid escape sequence in string While there, fix parse_string()'s inaccurate function comment. Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-27-armbru@redhat.com> --- qobject/json-lexer.c | 72 +++------------------------------------------------ qobject/json-parser.c | 56 +++++++++++++++++++++++---------------- 2 files changed, 37 insertions(+), 91 deletions(-) (limited to 'qobject/json-lexer.c') diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c index 4c402f62d3..0731779470 100644 --- a/qobject/json-lexer.c +++ b/qobject/json-lexer.c @@ -80,6 +80,8 @@ * escape = %x5C ; \ * quotation-mark = %x22 ; " * unescaped = %x20-21 / %x23-5B / %x5D-10FFFF + * [This lexer accepts any non-control character after escape, and + * leaves rejecting invalid ones to the parser.] * * * Extensions over RFC 8259: @@ -99,16 +101,8 @@ enum json_lexer_state { IN_ERROR = 0, /* must really be 0, see json_lexer[] */ - IN_DQ_UCODE3, - IN_DQ_UCODE2, - IN_DQ_UCODE1, - IN_DQ_UCODE0, IN_DQ_STRING_ESCAPE, IN_DQ_STRING, - IN_SQ_UCODE3, - IN_SQ_UCODE2, - IN_SQ_UCODE1, - IN_SQ_UCODE0, IN_SQ_STRING_ESCAPE, IN_SQ_STRING, IN_ZERO, @@ -144,37 +138,8 @@ static const uint8_t json_lexer[][256] = { /* Relies on default initialization to IN_ERROR! */ /* double quote string */ - [IN_DQ_UCODE3] = { - ['0' ... '9'] = IN_DQ_STRING, - ['a' ... 'f'] = IN_DQ_STRING, - ['A' ... 'F'] = IN_DQ_STRING, - }, - [IN_DQ_UCODE2] = { - ['0' ... '9'] = IN_DQ_UCODE3, - ['a' ... 'f'] = IN_DQ_UCODE3, - ['A' ... 'F'] = IN_DQ_UCODE3, - }, - [IN_DQ_UCODE1] = { - ['0' ... '9'] = IN_DQ_UCODE2, - ['a' ... 'f'] = IN_DQ_UCODE2, - ['A' ... 'F'] = IN_DQ_UCODE2, - }, - [IN_DQ_UCODE0] = { - ['0' ... '9'] = IN_DQ_UCODE1, - ['a' ... 'f'] = IN_DQ_UCODE1, - ['A' ... 'F'] = IN_DQ_UCODE1, - }, [IN_DQ_STRING_ESCAPE] = { - ['b'] = IN_DQ_STRING, - ['f'] = IN_DQ_STRING, - ['n'] = IN_DQ_STRING, - ['r'] = IN_DQ_STRING, - ['t'] = IN_DQ_STRING, - ['/'] = IN_DQ_STRING, - ['\\'] = IN_DQ_STRING, - ['\''] = IN_DQ_STRING, - ['\"'] = IN_DQ_STRING, - ['u'] = IN_DQ_UCODE0, + [0x20 ... 0xFD] = IN_DQ_STRING, }, [IN_DQ_STRING] = { [0x20 ... 0xFD] = IN_DQ_STRING, @@ -183,37 +148,8 @@ static const uint8_t json_lexer[][256] = { }, /* single quote string */ - [IN_SQ_UCODE3] = { - ['0' ... '9'] = IN_SQ_STRING, - ['a' ... 'f'] = IN_SQ_STRING, - ['A' ... 'F'] = IN_SQ_STRING, - }, - [IN_SQ_UCODE2] = { - ['0' ... '9'] = IN_SQ_UCODE3, - ['a' ... 'f'] = IN_SQ_UCODE3, - ['A' ... 'F'] = IN_SQ_UCODE3, - }, - [IN_SQ_UCODE1] = { - ['0' ... '9'] = IN_SQ_UCODE2, - ['a' ... 'f'] = IN_SQ_UCODE2, - ['A' ... 'F'] = IN_SQ_UCODE2, - }, - [IN_SQ_UCODE0] = { - ['0' ... '9'] = IN_SQ_UCODE1, - ['a' ... 'f'] = IN_SQ_UCODE1, - ['A' ... 'F'] = IN_SQ_UCODE1, - }, [IN_SQ_STRING_ESCAPE] = { - ['b'] = IN_SQ_STRING, - ['f'] = IN_SQ_STRING, - ['n'] = IN_SQ_STRING, - ['r'] = IN_SQ_STRING, - ['t'] = IN_SQ_STRING, - ['/'] = IN_SQ_STRING, - ['\\'] = IN_SQ_STRING, - ['\''] = IN_SQ_STRING, - ['\"'] = IN_SQ_STRING, - ['u'] = IN_SQ_UCODE0, + [0x20 ... 0xFD] = IN_SQ_STRING, }, [IN_SQ_STRING] = { [0x20 ... 0xFD] = IN_SQ_STRING, diff --git a/qobject/json-parser.c b/qobject/json-parser.c index a9b227f56c..7437827c24 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -106,30 +106,40 @@ static int hex2decimal(char ch) } /** - * parse_string(): Parse a json string and return a QObject + * parse_string(): Parse a JSON string * - * string - * "" - * " chars " - * chars - * char - * char chars - * char - * any-Unicode-character- - * except-"-or-\-or- - * control-character - * \" - * \\ - * \/ - * \b - * \f - * \n - * \r - * \t - * \u four-hex-digits + * From RFC 8259 "The JavaScript Object Notation (JSON) Data + * Interchange Format": + * + * char = unescaped / + * escape ( + * %x22 / ; " quotation mark U+0022 + * %x5C / ; \ reverse solidus U+005C + * %x2F / ; / solidus U+002F + * %x62 / ; b backspace U+0008 + * %x66 / ; f form feed U+000C + * %x6E / ; n line feed U+000A + * %x72 / ; r carriage return U+000D + * %x74 / ; t tab U+0009 + * %x75 4HEXDIG ) ; uXXXX U+XXXX + * escape = %x5C ; \ + * quotation-mark = %x22 ; " + * unescaped = %x20-21 / %x23-5B / %x5D-10FFFF + * + * Extensions over RFC 8259: + * - Extra escape sequence in strings: + * 0x27 (apostrophe) is recognized after escape, too + * - Single-quoted strings: + * Like double-quoted strings, except they're delimited by %x27 + * (apostrophe) instead of %x22 (quotation mark), and can't contain + * unescaped apostrophe, but can contain unescaped quotation mark. + * + * Note: + * - Encoding is modified UTF-8. + * - Invalid Unicode characters are rejected. + * - Control characters \x00..\x1F are rejected by the lexer. */ -static QString *qstring_from_escaped_str(JSONParserContext *ctxt, - JSONToken *token) +static QString *parse_string(JSONParserContext *ctxt, JSONToken *token) { const char *ptr = token->str; QString *str; @@ -495,7 +505,7 @@ static QObject *parse_literal(JSONParserContext *ctxt) switch (token->type) { case JSON_STRING: - return QOBJECT(qstring_from_escaped_str(ctxt, token)); + return QOBJECT(parse_string(ctxt, token)); case JSON_INTEGER: { /* * Represent JSON_INTEGER as QNUM_I64 if possible, else as -- cgit 1.4.1 From 7c1e1d5481fe8d2e757469179f9ccd14e8838ed1 Mon Sep 17 00:00:00 2001 From: Marc-André Lureau Date: Thu, 23 Aug 2018 18:39:58 +0200 Subject: json: remove useless return value from lexer/parser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The lexer always returns 0 when char feeding. Furthermore, none of the caller care about the return value. Signed-off-by: Marc-André Lureau Message-Id: <20180326150916.9602-10-marcandre.lureau@redhat.com> Reviewed-by: Markus Armbruster Reviewed-by: Thomas Huth Signed-off-by: Markus Armbruster Message-Id: <20180823164025.12553-32-armbru@redhat.com> --- include/qapi/qmp/json-lexer.h | 4 ++-- include/qapi/qmp/json-streamer.h | 4 ++-- qobject/json-lexer.c | 23 ++++++++--------------- qobject/json-streamer.c | 8 ++++---- 4 files changed, 16 insertions(+), 23 deletions(-) (limited to 'qobject/json-lexer.c') diff --git a/include/qapi/qmp/json-lexer.h b/include/qapi/qmp/json-lexer.h index afee7828cd..66ccf0357c 100644 --- a/include/qapi/qmp/json-lexer.h +++ b/include/qapi/qmp/json-lexer.h @@ -47,9 +47,9 @@ struct JSONLexer void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func); -int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size); +void json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size); -int json_lexer_flush(JSONLexer *lexer); +void json_lexer_flush(JSONLexer *lexer); void json_lexer_destroy(JSONLexer *lexer); diff --git a/include/qapi/qmp/json-streamer.h b/include/qapi/qmp/json-streamer.h index 00d8a23af8..cb808cf27d 100644 --- a/include/qapi/qmp/json-streamer.h +++ b/include/qapi/qmp/json-streamer.h @@ -36,10 +36,10 @@ typedef struct JSONMessageParser void json_message_parser_init(JSONMessageParser *parser, void (*func)(JSONMessageParser *, GQueue *)); -int json_message_parser_feed(JSONMessageParser *parser, +void json_message_parser_feed(JSONMessageParser *parser, const char *buffer, size_t size); -int json_message_parser_flush(JSONMessageParser *parser); +void json_message_parser_flush(JSONMessageParser *parser); void json_message_parser_destroy(JSONMessageParser *parser); diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c index 0731779470..d9701f857b 100644 --- a/qobject/json-lexer.c +++ b/qobject/json-lexer.c @@ -286,7 +286,7 @@ void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func) lexer->x = lexer->y = 0; } -static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) +static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) { int char_consumed, new_state; @@ -340,7 +340,7 @@ static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) g_string_truncate(lexer->token, 0); new_state = IN_START; lexer->state = new_state; - return 0; + return; default: break; } @@ -355,29 +355,22 @@ static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) g_string_truncate(lexer->token, 0); lexer->state = IN_START; } - - return 0; } -int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size) +void json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size) { size_t i; for (i = 0; i < size; i++) { - int err; - - err = json_lexer_feed_char(lexer, buffer[i], false); - if (err < 0) { - return err; - } + json_lexer_feed_char(lexer, buffer[i], false); } - - return 0; } -int json_lexer_flush(JSONLexer *lexer) +void json_lexer_flush(JSONLexer *lexer) { - return lexer->state == IN_START ? 0 : json_lexer_feed_char(lexer, 0, true); + if (lexer->state != IN_START) { + json_lexer_feed_char(lexer, 0, true); + } } void json_lexer_destroy(JSONLexer *lexer) diff --git a/qobject/json-streamer.c b/qobject/json-streamer.c index c51c2021f9..78dfff2aa0 100644 --- a/qobject/json-streamer.c +++ b/qobject/json-streamer.c @@ -118,15 +118,15 @@ void json_message_parser_init(JSONMessageParser *parser, json_lexer_init(&parser->lexer, json_message_process_token); } -int json_message_parser_feed(JSONMessageParser *parser, +void json_message_parser_feed(JSONMessageParser *parser, const char *buffer, size_t size) { - return json_lexer_feed(&parser->lexer, buffer, size); + json_lexer_feed(&parser->lexer, buffer, size); } -int json_message_parser_flush(JSONMessageParser *parser) +void json_message_parser_flush(JSONMessageParser *parser) { - return json_lexer_flush(&parser->lexer); + json_lexer_flush(&parser->lexer); } void json_message_parser_destroy(JSONMessageParser *parser) -- cgit 1.4.1 From 037f2440888a22bd00ea0d8e37a1b7ed7d2bba88 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:40:00 +0200 Subject: json: Have lexer call streamer directly json_lexer_init() takes the function to process a token as an argument. It's always json_message_process_token(). Makes the code harder to understand for no actual gain. Drop the indirection. Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-34-armbru@redhat.com> --- include/qapi/qmp/json-lexer.h | 13 +++---------- include/qapi/qmp/json-streamer.h | 3 +++ qobject/json-lexer.c | 13 ++++++++----- qobject/json-streamer.c | 6 +++--- 4 files changed, 17 insertions(+), 18 deletions(-) (limited to 'qobject/json-lexer.c') diff --git a/include/qapi/qmp/json-lexer.h b/include/qapi/qmp/json-lexer.h index 66ccf0357c..44bcf2ca64 100644 --- a/include/qapi/qmp/json-lexer.h +++ b/include/qapi/qmp/json-lexer.h @@ -32,20 +32,13 @@ typedef enum json_token_type { JSON_ERROR, } JSONTokenType; -typedef struct JSONLexer JSONLexer; - -typedef void (JSONLexerEmitter)(JSONLexer *, GString *, - JSONTokenType, int x, int y); - -struct JSONLexer -{ - JSONLexerEmitter *emit; +typedef struct JSONLexer { int state; GString *token; int x, y; -}; +} JSONLexer; -void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func); +void json_lexer_init(JSONLexer *lexer); void json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size); diff --git a/include/qapi/qmp/json-streamer.h b/include/qapi/qmp/json-streamer.h index cb808cf27d..7922e185a5 100644 --- a/include/qapi/qmp/json-streamer.h +++ b/include/qapi/qmp/json-streamer.h @@ -33,6 +33,9 @@ typedef struct JSONMessageParser uint64_t token_size; } JSONMessageParser; +void json_message_process_token(JSONLexer *lexer, GString *input, + JSONTokenType type, int x, int y); + void json_message_parser_init(JSONMessageParser *parser, void (*func)(JSONMessageParser *, GQueue *)); diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c index d9701f857b..17272a3874 100644 --- a/qobject/json-lexer.c +++ b/qobject/json-lexer.c @@ -14,6 +14,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "qapi/qmp/json-lexer.h" +#include "qapi/qmp/json-streamer.h" #define MAX_TOKEN_SIZE (64ULL << 20) @@ -278,9 +279,8 @@ static const uint8_t json_lexer[][256] = { }, }; -void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func) +void json_lexer_init(JSONLexer *lexer) { - lexer->emit = func; lexer->state = IN_START; lexer->token = g_string_sized_new(3); lexer->x = lexer->y = 0; @@ -316,7 +316,8 @@ static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) case JSON_FLOAT: case JSON_KEYWORD: case JSON_STRING: - lexer->emit(lexer, lexer->token, new_state, lexer->x, lexer->y); + json_message_process_token(lexer, lexer->token, new_state, + lexer->x, lexer->y); /* fall through */ case JSON_SKIP: g_string_truncate(lexer->token, 0); @@ -336,7 +337,8 @@ static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) * never a valid ASCII/UTF-8 sequence, so this should reliably * induce an error/flush state. */ - lexer->emit(lexer, lexer->token, JSON_ERROR, lexer->x, lexer->y); + json_message_process_token(lexer, lexer->token, JSON_ERROR, + lexer->x, lexer->y); g_string_truncate(lexer->token, 0); new_state = IN_START; lexer->state = new_state; @@ -351,7 +353,8 @@ static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) * this is a security consideration. */ if (lexer->token->len > MAX_TOKEN_SIZE) { - lexer->emit(lexer, lexer->token, lexer->state, lexer->x, lexer->y); + json_message_process_token(lexer, lexer->token, lexer->state, + lexer->x, lexer->y); g_string_truncate(lexer->token, 0); lexer->state = IN_START; } diff --git a/qobject/json-streamer.c b/qobject/json-streamer.c index 78dfff2aa0..9f57ebf2bd 100644 --- a/qobject/json-streamer.c +++ b/qobject/json-streamer.c @@ -34,8 +34,8 @@ static void json_message_free_tokens(JSONMessageParser *parser) } } -static void json_message_process_token(JSONLexer *lexer, GString *input, - JSONTokenType type, int x, int y) +void json_message_process_token(JSONLexer *lexer, GString *input, + JSONTokenType type, int x, int y) { JSONMessageParser *parser = container_of(lexer, JSONMessageParser, lexer); JSONToken *token; @@ -115,7 +115,7 @@ void json_message_parser_init(JSONMessageParser *parser, parser->tokens = g_queue_new(); parser->token_size = 0; - json_lexer_init(&parser->lexer, json_message_process_token); + json_lexer_init(&parser->lexer); } void json_message_parser_feed(JSONMessageParser *parser, -- cgit 1.4.1 From 61030280ca2d67bd63cb068250aee55849cd38ca Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:40:04 +0200 Subject: json: Rename token JSON_ESCAPE & friends to JSON_INTERP The JSON parser optionally supports interpolation. The code calls it "escape". Awkward, because it uses the same term for escape sequences within strings. The latter usage is consistent with RFC 8259 "The JavaScript Object Notation (JSON) Data Interchange Format" and ISO C. Call the former "interpolation" instead. Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-38-armbru@redhat.com> --- include/qapi/qmp/json-lexer.h | 2 +- qobject/json-lexer.c | 64 +++++++++++++++++++++---------------------- qobject/json-parser.c | 8 +++--- 3 files changed, 37 insertions(+), 37 deletions(-) (limited to 'qobject/json-lexer.c') diff --git a/include/qapi/qmp/json-lexer.h b/include/qapi/qmp/json-lexer.h index 44bcf2ca64..8bce6ef676 100644 --- a/include/qapi/qmp/json-lexer.h +++ b/include/qapi/qmp/json-lexer.h @@ -27,7 +27,7 @@ typedef enum json_token_type { JSON_FLOAT, JSON_KEYWORD, JSON_STRING, - JSON_ESCAPE, + JSON_INTERP, JSON_SKIP, JSON_ERROR, } JSONTokenType; diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c index 17272a3874..5436809be6 100644 --- a/qobject/json-lexer.c +++ b/qobject/json-lexer.c @@ -115,12 +115,12 @@ enum json_lexer_state { IN_NONZERO_NUMBER, IN_NEG_NONZERO_NUMBER, IN_KEYWORD, - IN_ESCAPE, - IN_ESCAPE_L, - IN_ESCAPE_LL, - IN_ESCAPE_I, - IN_ESCAPE_I6, - IN_ESCAPE_I64, + IN_INTERP, + IN_INTERP_L, + IN_INTERP_LL, + IN_INTERP_I, + IN_INTERP_I6, + IN_INTERP_I64, IN_WHITESPACE, IN_START, }; @@ -221,40 +221,40 @@ static const uint8_t json_lexer[][256] = { ['\n'] = IN_WHITESPACE, }, - /* escape */ - [IN_ESCAPE_LL] = { - ['d'] = JSON_ESCAPE, - ['u'] = JSON_ESCAPE, + /* interpolation */ + [IN_INTERP_LL] = { + ['d'] = JSON_INTERP, + ['u'] = JSON_INTERP, }, - [IN_ESCAPE_L] = { - ['d'] = JSON_ESCAPE, - ['l'] = IN_ESCAPE_LL, - ['u'] = JSON_ESCAPE, + [IN_INTERP_L] = { + ['d'] = JSON_INTERP, + ['l'] = IN_INTERP_LL, + ['u'] = JSON_INTERP, }, - [IN_ESCAPE_I64] = { - ['d'] = JSON_ESCAPE, - ['u'] = JSON_ESCAPE, + [IN_INTERP_I64] = { + ['d'] = JSON_INTERP, + ['u'] = JSON_INTERP, }, - [IN_ESCAPE_I6] = { - ['4'] = IN_ESCAPE_I64, + [IN_INTERP_I6] = { + ['4'] = IN_INTERP_I64, }, - [IN_ESCAPE_I] = { - ['6'] = IN_ESCAPE_I6, + [IN_INTERP_I] = { + ['6'] = IN_INTERP_I6, }, - [IN_ESCAPE] = { - ['d'] = JSON_ESCAPE, - ['i'] = JSON_ESCAPE, - ['p'] = JSON_ESCAPE, - ['s'] = JSON_ESCAPE, - ['u'] = JSON_ESCAPE, - ['f'] = JSON_ESCAPE, - ['l'] = IN_ESCAPE_L, - ['I'] = IN_ESCAPE_I, + [IN_INTERP] = { + ['d'] = JSON_INTERP, + ['i'] = JSON_INTERP, + ['p'] = JSON_INTERP, + ['s'] = JSON_INTERP, + ['u'] = JSON_INTERP, + ['f'] = JSON_INTERP, + ['l'] = IN_INTERP_L, + ['I'] = IN_INTERP_I, }, /* top level rule */ @@ -271,7 +271,7 @@ static const uint8_t json_lexer[][256] = { [','] = JSON_COMMA, [':'] = JSON_COLON, ['a' ... 'z'] = IN_KEYWORD, - ['%'] = IN_ESCAPE, + ['%'] = IN_INTERP, [' '] = IN_WHITESPACE, ['\t'] = IN_WHITESPACE, ['\r'] = IN_WHITESPACE, @@ -311,7 +311,7 @@ static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) case JSON_RSQUARE: case JSON_COLON: case JSON_COMMA: - case JSON_ESCAPE: + case JSON_INTERP: case JSON_INTEGER: case JSON_FLOAT: case JSON_KEYWORD: diff --git a/qobject/json-parser.c b/qobject/json-parser.c index 06aff19a5d..864cb578d8 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -423,7 +423,7 @@ static QObject *parse_keyword(JSONParserContext *ctxt) return NULL; } -static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap) +static QObject *parse_interpolation(JSONParserContext *ctxt, va_list *ap) { JSONToken *token; @@ -432,7 +432,7 @@ static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap) } token = parser_context_pop_token(ctxt); - assert(token && token->type == JSON_ESCAPE); + assert(token && token->type == JSON_INTERP); if (!strcmp(token->str, "%p")) { return va_arg(*ap, QObject *); @@ -527,8 +527,8 @@ static QObject *parse_value(JSONParserContext *ctxt, va_list *ap) return parse_object(ctxt, ap); case JSON_LSQUARE: return parse_array(ctxt, ap); - case JSON_ESCAPE: - return parse_escape(ctxt, ap); + case JSON_INTERP: + return parse_interpolation(ctxt, ap); case JSON_INTEGER: case JSON_FLOAT: case JSON_STRING: -- cgit 1.4.1 From 2cbd15aa6f4d4694376dd0d231d56e572ac870c1 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:40:05 +0200 Subject: json: Treat unwanted interpolation as lexical error The JSON parser optionally supports interpolation. The lexer recognizes interpolation tokens unconditionally. The parser rejects them when interpolation is disabled, in parse_interpolation(). However, it neglects to set an error then, which can make json_parser_parse() fail without setting an error. Move the check for unwanted interpolation from the parser's parse_interpolation() into the lexer's finite state machine. When interpolation is disabled, '%' is now handled like any other unexpected character. The next commit will improve how such lexical errors are handled. Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-39-armbru@redhat.com> --- include/qapi/qmp/json-lexer.h | 4 ++-- qobject/json-lexer.c | 30 ++++++++++++++++++------------ qobject/json-parser.c | 4 ---- qobject/json-streamer.c | 2 +- tests/qmp-test.c | 4 ++++ 5 files changed, 25 insertions(+), 19 deletions(-) (limited to 'qobject/json-lexer.c') diff --git a/include/qapi/qmp/json-lexer.h b/include/qapi/qmp/json-lexer.h index 8bce6ef676..afa84cb910 100644 --- a/include/qapi/qmp/json-lexer.h +++ b/include/qapi/qmp/json-lexer.h @@ -33,12 +33,12 @@ typedef enum json_token_type { } JSONTokenType; typedef struct JSONLexer { - int state; + int start_state, state; GString *token; int x, y; } JSONLexer; -void json_lexer_init(JSONLexer *lexer); +void json_lexer_init(JSONLexer *lexer, bool enable_interpolation); void json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size); diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c index 5436809be6..96fe13621d 100644 --- a/qobject/json-lexer.c +++ b/qobject/json-lexer.c @@ -92,7 +92,7 @@ * Like double-quoted strings, except they're delimited by %x27 * (apostrophe) instead of %x22 (quotation mark), and can't contain * unescaped apostrophe, but can contain unescaped quotation mark. - * - Interpolation: + * - Interpolation, if enabled: * interpolation = %((l|ll|I64)[du]|[ipsf]) * * Note: @@ -123,9 +123,11 @@ enum json_lexer_state { IN_INTERP_I64, IN_WHITESPACE, IN_START, + IN_START_INTERP, /* must be IN_START + 1 */ }; -QEMU_BUILD_BUG_ON((int)JSON_MIN <= (int)IN_START); +QEMU_BUILD_BUG_ON((int)JSON_MIN <= (int)IN_START_INTERP); +QEMU_BUILD_BUG_ON(IN_START_INTERP != IN_START + 1); #define TERMINAL(state) [0 ... 0x7F] = (state) @@ -257,8 +259,12 @@ static const uint8_t json_lexer[][256] = { ['I'] = IN_INTERP_I, }, - /* top level rule */ - [IN_START] = { + /* + * Two start states: + * - IN_START recognizes JSON tokens with our string extensions + * - IN_START_INTERP additionally recognizes interpolation. + */ + [IN_START ... IN_START_INTERP] = { ['"'] = IN_DQ_STRING, ['\''] = IN_SQ_STRING, ['0'] = IN_ZERO, @@ -271,17 +277,18 @@ static const uint8_t json_lexer[][256] = { [','] = JSON_COMMA, [':'] = JSON_COLON, ['a' ... 'z'] = IN_KEYWORD, - ['%'] = IN_INTERP, [' '] = IN_WHITESPACE, ['\t'] = IN_WHITESPACE, ['\r'] = IN_WHITESPACE, ['\n'] = IN_WHITESPACE, }, + [IN_START_INTERP]['%'] = IN_INTERP, }; -void json_lexer_init(JSONLexer *lexer) +void json_lexer_init(JSONLexer *lexer, bool enable_interpolation) { - lexer->state = IN_START; + lexer->start_state = lexer->state = enable_interpolation + ? IN_START_INTERP : IN_START; lexer->token = g_string_sized_new(3); lexer->x = lexer->y = 0; } @@ -321,7 +328,7 @@ static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) /* fall through */ case JSON_SKIP: g_string_truncate(lexer->token, 0); - new_state = IN_START; + new_state = lexer->start_state; break; case IN_ERROR: /* XXX: To avoid having previous bad input leaving the parser in an @@ -340,8 +347,7 @@ static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) json_message_process_token(lexer, lexer->token, JSON_ERROR, lexer->x, lexer->y); g_string_truncate(lexer->token, 0); - new_state = IN_START; - lexer->state = new_state; + lexer->state = lexer->start_state; return; default: break; @@ -356,7 +362,7 @@ static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) json_message_process_token(lexer, lexer->token, lexer->state, lexer->x, lexer->y); g_string_truncate(lexer->token, 0); - lexer->state = IN_START; + lexer->state = lexer->start_state; } } @@ -371,7 +377,7 @@ void json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size) void json_lexer_flush(JSONLexer *lexer) { - if (lexer->state != IN_START) { + if (lexer->state != lexer->start_state) { json_lexer_feed_char(lexer, 0, true); } } diff --git a/qobject/json-parser.c b/qobject/json-parser.c index 864cb578d8..2855eaaeca 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -427,10 +427,6 @@ static QObject *parse_interpolation(JSONParserContext *ctxt, va_list *ap) { JSONToken *token; - if (ap == NULL) { - return NULL; - } - token = parser_context_pop_token(ctxt); assert(token && token->type == JSON_INTERP); diff --git a/qobject/json-streamer.c b/qobject/json-streamer.c index fa595a8761..a373e0114a 100644 --- a/qobject/json-streamer.c +++ b/qobject/json-streamer.c @@ -115,7 +115,7 @@ void json_message_parser_init(JSONMessageParser *parser, parser->tokens = g_queue_new(); parser->token_size = 0; - json_lexer_init(&parser->lexer); + json_lexer_init(&parser->lexer, !!ap); } void json_message_parser_feed(JSONMessageParser *parser, diff --git a/tests/qmp-test.c b/tests/qmp-test.c index 7b3ba17c4a..4ae2245484 100644 --- a/tests/qmp-test.c +++ b/tests/qmp-test.c @@ -94,6 +94,10 @@ static void test_malformed(QTestState *qts) /* lexical error: interpolation */ qtest_qmp_send_raw(qts, "%%p\n"); + /* two errors, one for "%", one for "p" */ + resp = qtest_qmp_receive(qts); + g_assert_cmpstr(get_error_class(resp), ==, "GenericError"); + qobject_unref(resp); resp = qtest_qmp_receive(qts); g_assert_cmpstr(get_error_class(resp), ==, "GenericError"); qobject_unref(resp); -- cgit 1.4.1 From 84a56f38b23440cb3127eaffe4e495826a29f18c Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:40:06 +0200 Subject: json: Pass lexical errors and limit violations to callback The callback to consume JSON values takes QObject *json, Error *err. If both are null, the callback is supposed to make up an error by itself. This sucks. qjson.c's consume_json() neglects to do so, which makes qobject_from_json() null instead of failing. I consider that a bug. The culprit is json_message_process_token(): it passes two null pointers when it runs into a lexical error or a limit violation. Fix it to pass a proper Error object then. Update the callbacks: * monitor.c's handle_qmp_command(): the code to make up an error is now dead, drop it. * qga/main.c's process_event(): lumps the "both null" case together with the "not a JSON object" case. The former is now gone. The error message "Invalid JSON syntax" is misleading for the latter. Improve it to "Input must be a JSON object". * qobject/qjson.c's consume_json(): no update; check-qjson demonstrates qobject_from_json() now sets an error on lexical errors, but still doesn't on some other errors. * tests/libqtest.c's qmp_response(): the Error object is now reliable, so use it to improve the error message. Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-40-armbru@redhat.com> --- include/qapi/qmp/qerror.h | 3 --- monitor.c | 5 +---- qga/main.c | 3 ++- qobject/json-lexer.c | 3 +-- qobject/json-streamer.c | 22 ++++++++++++++++------ tests/check-qjson.c | 15 ++++++++------- tests/libqtest.c | 7 +++++-- 7 files changed, 33 insertions(+), 25 deletions(-) (limited to 'qobject/json-lexer.c') diff --git a/include/qapi/qmp/qerror.h b/include/qapi/qmp/qerror.h index c82360f429..145571f618 100644 --- a/include/qapi/qmp/qerror.h +++ b/include/qapi/qmp/qerror.h @@ -61,9 +61,6 @@ #define QERR_IO_ERROR \ "An IO error has occurred" -#define QERR_JSON_PARSING \ - "Invalid JSON syntax" - #define QERR_MIGRATION_ACTIVE \ "There's a migration process in progress" diff --git a/monitor.c b/monitor.c index 08f799a7bb..3dbdcb5190 100644 --- a/monitor.c +++ b/monitor.c @@ -4262,10 +4262,7 @@ static void handle_qmp_command(void *opaque, QObject *req, Error *err) QDict *qdict; QMPRequest *req_obj; - if (!req && !err) { - /* json_parser_parse() sucks: can fail without setting @err */ - error_setg(&err, QERR_JSON_PARSING); - } + assert(!req != !err); qdict = qobject_to(QDict, req); if (qdict) { diff --git a/qga/main.c b/qga/main.c index 2fc49d00d8..b74e1241ef 100644 --- a/qga/main.c +++ b/qga/main.c @@ -603,12 +603,13 @@ static void process_event(void *opaque, QObject *obj, Error *err) int ret; g_debug("process_event: called"); + assert(!obj != !err); if (err) { goto err; } req = qobject_to(QDict, obj); if (!req) { - error_setg(&err, QERR_JSON_PARSING); + error_setg(&err, "Input must be a JSON object"); goto err; } if (!qdict_haskey(req, "execute")) { diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c index 96fe13621d..7c31c2c8ff 100644 --- a/qobject/json-lexer.c +++ b/qobject/json-lexer.c @@ -334,8 +334,7 @@ static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) /* XXX: To avoid having previous bad input leaving the parser in an * unresponsive state where we consume unpredictable amounts of * subsequent "good" input, percolate this error state up to the - * tokenizer/parser by forcing a NULL object to be emitted, then - * reset state. + * parser by emitting a JSON_ERROR token, then reset lexer state. * * Also note that this handling is required for reliable channel * negotiation between QMP and the guest agent, since chr(0xFF) diff --git a/qobject/json-streamer.c b/qobject/json-streamer.c index a373e0114a..e372ecc895 100644 --- a/qobject/json-streamer.c +++ b/qobject/json-streamer.c @@ -13,6 +13,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" +#include "qapi/error.h" #include "qapi/qmp/json-lexer.h" #include "qapi/qmp/json-parser.h" #include "qapi/qmp/json-streamer.h" @@ -57,6 +58,7 @@ void json_message_process_token(JSONLexer *lexer, GString *input, parser->bracket_count--; break; case JSON_ERROR: + error_setg(&err, "JSON parse error, stray '%s'", input->str); goto out_emit; default: break; @@ -82,12 +84,20 @@ void json_message_process_token(JSONLexer *lexer, GString *input, goto out_emit; } - if (parser->token_size > MAX_TOKEN_SIZE || - g_queue_get_length(parser->tokens) > MAX_TOKEN_COUNT || - parser->bracket_count + parser->brace_count > MAX_NESTING) { - /* Security consideration, we limit total memory allocated per object - * and the maximum recursion depth that a message can force. - */ + /* + * Security consideration, we limit total memory allocated per object + * and the maximum recursion depth that a message can force. + */ + if (parser->token_size > MAX_TOKEN_SIZE) { + error_setg(&err, "JSON token size limit exceeded"); + goto out_emit; + } + if (g_queue_get_length(parser->tokens) > MAX_TOKEN_COUNT) { + error_setg(&err, "JSON token count limit exceeded"); + goto out_emit; + } + if (parser->bracket_count + parser->brace_count > MAX_NESTING) { + error_setg(&err, "JSON nesting depth limit exceeded"); goto out_emit; } diff --git a/tests/check-qjson.c b/tests/check-qjson.c index 604886a1a2..d6fda0786f 100644 --- a/tests/check-qjson.c +++ b/tests/check-qjson.c @@ -1021,6 +1021,7 @@ static void interpolation_unknown(void) } g_test_trap_subprocess(NULL, 0, 0); g_test_trap_assert_failed(); + g_test_trap_assert_stderr("*Unexpected error*stray '%x'*"); } static void interpolation_string(void) @@ -1296,11 +1297,11 @@ static void junk_input(void) QObject *obj; obj = qobject_from_json("@", &err); - g_assert(!err); /* BUG */ + error_free_or_abort(&err); g_assert(obj == NULL); obj = qobject_from_json("{\x01", &err); - g_assert(!err); /* BUG */ + error_free_or_abort(&err); g_assert(obj == NULL); obj = qobject_from_json("[0\xFF]", &err); @@ -1308,11 +1309,11 @@ static void junk_input(void) g_assert(obj == NULL); obj = qobject_from_json("00", &err); - g_assert(!err); /* BUG */ + error_free_or_abort(&err); g_assert(obj == NULL); obj = qobject_from_json("[1e", &err); - g_assert(!err); /* BUG */ + error_free_or_abort(&err); g_assert(obj == NULL); obj = qobject_from_json("truer", &err); @@ -1324,7 +1325,7 @@ static void unterminated_string(void) { Error *err = NULL; QObject *obj = qobject_from_json("\"abc", &err); - g_assert(!err); /* BUG */ + error_free_or_abort(&err); g_assert(obj == NULL); } @@ -1332,7 +1333,7 @@ static void unterminated_sq_string(void) { Error *err = NULL; QObject *obj = qobject_from_json("'abc", &err); - g_assert(!err); /* BUG */ + error_free_or_abort(&err); g_assert(obj == NULL); } @@ -1340,7 +1341,7 @@ static void unterminated_escape(void) { Error *err = NULL; QObject *obj = qobject_from_json("\"abc\\\"", &err); - g_assert(!err); /* BUG */ + error_free_or_abort(&err); g_assert(obj == NULL); } diff --git a/tests/libqtest.c b/tests/libqtest.c index 1f3b0cb1b1..5973a67652 100644 --- a/tests/libqtest.c +++ b/tests/libqtest.c @@ -450,8 +450,11 @@ static void qmp_response(void *opaque, QObject *obj, Error *err) { QMPResponseParser *qmp = opaque; - if (!obj) { - fprintf(stderr, "QMP JSON response parsing failed\n"); + assert(!obj != !err); + + if (err) { + error_prepend(&err, "QMP JSON response parsing failed: "); + error_report_err(err); abort(); } -- cgit 1.4.1 From f7617d45d4652ae10d38bd0c917d7488d155cccb Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:40:07 +0200 Subject: json: Leave rejecting invalid interpolation to parser Both lexer and parser reject invalid interpolation specifications. The parser's check is useless. The lexer ends the token right after the first bad character. This tends to lead to suboptimal error reporting. For instance, input [ %04d ] produces the tokens JSON_LSQUARE [ JSON_ERROR %0 JSON_INTEGER 4 JSON_KEYWORD d JSON_RSQUARE ] The parser then yields an error, an object and two more errors: error: Invalid JSON syntax object: 4 error: JSON parse error, invalid keyword error: JSON parse error, expecting value Dumb down the lexer to accept [A-Za-z0-9]*. The parser's check is now used. Emit a proper error there. The lexer now produces JSON_LSQUARE [ JSON_INTERP %04d JSON_RSQUARE ] and the parser reports just JSON parse error, invalid interpolation '%04d' Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-41-armbru@redhat.com> --- qobject/json-lexer.c | 44 ++++++-------------------------------------- qobject/json-parser.c | 1 + tests/check-qjson.c | 3 ++- 3 files changed, 9 insertions(+), 39 deletions(-) (limited to 'qobject/json-lexer.c') diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c index 7c31c2c8ff..f1a4b5a430 100644 --- a/qobject/json-lexer.c +++ b/qobject/json-lexer.c @@ -93,7 +93,8 @@ * (apostrophe) instead of %x22 (quotation mark), and can't contain * unescaped apostrophe, but can contain unescaped quotation mark. * - Interpolation, if enabled: - * interpolation = %((l|ll|I64)[du]|[ipsf]) + * The lexer accepts %[A-Za-z0-9]*, and leaves rejecting invalid + * ones to the parser. * * Note: * - Input must be encoded in modified UTF-8. @@ -116,11 +117,6 @@ enum json_lexer_state { IN_NEG_NONZERO_NUMBER, IN_KEYWORD, IN_INTERP, - IN_INTERP_L, - IN_INTERP_LL, - IN_INTERP_I, - IN_INTERP_I6, - IN_INTERP_I64, IN_WHITESPACE, IN_START, IN_START_INTERP, /* must be IN_START + 1 */ @@ -224,39 +220,11 @@ static const uint8_t json_lexer[][256] = { }, /* interpolation */ - [IN_INTERP_LL] = { - ['d'] = JSON_INTERP, - ['u'] = JSON_INTERP, - }, - - [IN_INTERP_L] = { - ['d'] = JSON_INTERP, - ['l'] = IN_INTERP_LL, - ['u'] = JSON_INTERP, - }, - - [IN_INTERP_I64] = { - ['d'] = JSON_INTERP, - ['u'] = JSON_INTERP, - }, - - [IN_INTERP_I6] = { - ['4'] = IN_INTERP_I64, - }, - - [IN_INTERP_I] = { - ['6'] = IN_INTERP_I6, - }, - [IN_INTERP] = { - ['d'] = JSON_INTERP, - ['i'] = JSON_INTERP, - ['p'] = JSON_INTERP, - ['s'] = JSON_INTERP, - ['u'] = JSON_INTERP, - ['f'] = JSON_INTERP, - ['l'] = IN_INTERP_L, - ['I'] = IN_INTERP_I, + TERMINAL(JSON_INTERP), + ['A' ... 'Z'] = IN_INTERP, + ['a' ... 'z'] = IN_INTERP, + ['0' ... '9'] = IN_INTERP, }, /* diff --git a/qobject/json-parser.c b/qobject/json-parser.c index 2855eaaeca..e61cee9e8a 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -453,6 +453,7 @@ static QObject *parse_interpolation(JSONParserContext *ctxt, va_list *ap) } else if (!strcmp(token->str, "%f")) { return QOBJECT(qnum_from_double(va_arg(*ap, double))); } + parse_error(ctxt, token, "invalid interpolation '%s'", token->str); return NULL; } diff --git a/tests/check-qjson.c b/tests/check-qjson.c index d6fda0786f..83f8a0e6e3 100644 --- a/tests/check-qjson.c +++ b/tests/check-qjson.c @@ -1021,7 +1021,8 @@ static void interpolation_unknown(void) } g_test_trap_subprocess(NULL, 0, 0); g_test_trap_assert_failed(); - g_test_trap_assert_stderr("*Unexpected error*stray '%x'*"); + g_test_trap_assert_stderr("*Unexpected error*" + "invalid interpolation '%x'*"); } static void interpolation_string(void) -- cgit 1.4.1 From 4d40066142278a7a9cfbde1ae481e0e94a4d24c3 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:40:09 +0200 Subject: json: Improve names of lexer states related to numbers Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-43-armbru@redhat.com> --- qobject/json-lexer.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'qobject/json-lexer.c') diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c index f1a4b5a430..01417dca9d 100644 --- a/qobject/json-lexer.c +++ b/qobject/json-lexer.c @@ -108,13 +108,13 @@ enum json_lexer_state { IN_SQ_STRING_ESCAPE, IN_SQ_STRING, IN_ZERO, - IN_DIGITS, - IN_DIGIT, + IN_EXP_DIGITS, + IN_EXP_SIGN, IN_EXP_E, IN_MANTISSA, IN_MANTISSA_DIGITS, - IN_NONZERO_NUMBER, - IN_NEG_NONZERO_NUMBER, + IN_DIGITS, + IN_SIGN, IN_KEYWORD, IN_INTERP, IN_WHITESPACE, @@ -164,19 +164,19 @@ static const uint8_t json_lexer[][256] = { }, /* Float */ - [IN_DIGITS] = { + [IN_EXP_DIGITS] = { TERMINAL(JSON_FLOAT), - ['0' ... '9'] = IN_DIGITS, + ['0' ... '9'] = IN_EXP_DIGITS, }, - [IN_DIGIT] = { - ['0' ... '9'] = IN_DIGITS, + [IN_EXP_SIGN] = { + ['0' ... '9'] = IN_EXP_DIGITS, }, [IN_EXP_E] = { - ['-'] = IN_DIGIT, - ['+'] = IN_DIGIT, - ['0' ... '9'] = IN_DIGITS, + ['-'] = IN_EXP_SIGN, + ['+'] = IN_EXP_SIGN, + ['0' ... '9'] = IN_EXP_DIGITS, }, [IN_MANTISSA_DIGITS] = { @@ -191,17 +191,17 @@ static const uint8_t json_lexer[][256] = { }, /* Number */ - [IN_NONZERO_NUMBER] = { + [IN_DIGITS] = { TERMINAL(JSON_INTEGER), - ['0' ... '9'] = IN_NONZERO_NUMBER, + ['0' ... '9'] = IN_DIGITS, ['e'] = IN_EXP_E, ['E'] = IN_EXP_E, ['.'] = IN_MANTISSA, }, - [IN_NEG_NONZERO_NUMBER] = { + [IN_SIGN] = { ['0'] = IN_ZERO, - ['1' ... '9'] = IN_NONZERO_NUMBER, + ['1' ... '9'] = IN_DIGITS, }, /* keywords */ @@ -236,8 +236,8 @@ static const uint8_t json_lexer[][256] = { ['"'] = IN_DQ_STRING, ['\''] = IN_SQ_STRING, ['0'] = IN_ZERO, - ['1' ... '9'] = IN_NONZERO_NUMBER, - ['-'] = IN_NEG_NONZERO_NUMBER, + ['1' ... '9'] = IN_DIGITS, + ['-'] = IN_SIGN, ['{'] = JSON_LCURLY, ['}'] = JSON_RCURLY, ['['] = JSON_LSQUARE, -- cgit 1.4.1 From f9277915ee7b2654f5347c4c261c8a0651fdd561 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:40:12 +0200 Subject: json: Fix streamer not to ignore trailing unterminated structures json_message_process_token() accumulates tokens until it got the sequence of tokens that comprise a single JSON value (it counts curly braces and square brackets to decide). It feeds those token sequences to json_parser_parse(). If a non-empty sequence of tokens remains at the end of the parse, it's silently ignored. check-qjson.c cases unterminated_array(), unterminated_array_comma(), unterminated_dict(), unterminated_dict_comma() demonstrate this bug. Fix as follows. Introduce a JSON_END_OF_INPUT token. When the streamer receives it, it feeds the accumulated tokens to json_parser_parse(). Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-46-armbru@redhat.com> --- include/qapi/qmp/json-lexer.h | 1 + qobject/json-lexer.c | 2 ++ qobject/json-streamer.c | 8 ++++++++ tests/check-qjson.c | 8 ++++---- 4 files changed, 15 insertions(+), 4 deletions(-) (limited to 'qobject/json-lexer.c') diff --git a/include/qapi/qmp/json-lexer.h b/include/qapi/qmp/json-lexer.h index afa84cb910..508fc7bdaf 100644 --- a/include/qapi/qmp/json-lexer.h +++ b/include/qapi/qmp/json-lexer.h @@ -30,6 +30,7 @@ typedef enum json_token_type { JSON_INTERP, JSON_SKIP, JSON_ERROR, + JSON_END_OF_INPUT, } JSONTokenType; typedef struct JSONLexer { diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c index 01417dca9d..a728c32faa 100644 --- a/qobject/json-lexer.c +++ b/qobject/json-lexer.c @@ -347,6 +347,8 @@ void json_lexer_flush(JSONLexer *lexer) if (lexer->state != lexer->start_state) { json_lexer_feed_char(lexer, 0, true); } + json_message_process_token(lexer, lexer->token, JSON_END_OF_INPUT, + lexer->x, lexer->y); } void json_lexer_destroy(JSONLexer *lexer) diff --git a/qobject/json-streamer.c b/qobject/json-streamer.c index e372ecc895..674dfe6e85 100644 --- a/qobject/json-streamer.c +++ b/qobject/json-streamer.c @@ -60,6 +60,13 @@ void json_message_process_token(JSONLexer *lexer, GString *input, case JSON_ERROR: error_setg(&err, "JSON parse error, stray '%s'", input->str); goto out_emit; + case JSON_END_OF_INPUT: + if (g_queue_is_empty(parser->tokens)) { + return; + } + json = json_parser_parse(parser->tokens, parser->ap, &err); + parser->tokens = NULL; + goto out_emit; default: break; } @@ -137,6 +144,7 @@ void json_message_parser_feed(JSONMessageParser *parser, void json_message_parser_flush(JSONMessageParser *parser) { json_lexer_flush(&parser->lexer); + assert(g_queue_is_empty(parser->tokens)); } void json_message_parser_destroy(JSONMessageParser *parser) diff --git a/tests/check-qjson.c b/tests/check-qjson.c index f9438370d9..0ca4b3c823 100644 --- a/tests/check-qjson.c +++ b/tests/check-qjson.c @@ -1360,7 +1360,7 @@ static void unterminated_array(void) { Error *err = NULL; QObject *obj = qobject_from_json("[32", &err); - g_assert(!err); /* BUG */ + error_free_or_abort(&err); g_assert(obj == NULL); } @@ -1368,7 +1368,7 @@ static void unterminated_array_comma(void) { Error *err = NULL; QObject *obj = qobject_from_json("[32,", &err); - g_assert(!err); /* BUG */ + error_free_or_abort(&err); g_assert(obj == NULL); } @@ -1384,7 +1384,7 @@ static void unterminated_dict(void) { Error *err = NULL; QObject *obj = qobject_from_json("{'abc':32", &err); - g_assert(!err); /* BUG */ + error_free_or_abort(&err); g_assert(obj == NULL); } @@ -1392,7 +1392,7 @@ static void unterminated_dict_comma(void) { Error *err = NULL; QObject *obj = qobject_from_json("{'abc':32,", &err); - g_assert(!err); /* BUG */ + error_free_or_abort(&err); g_assert(obj == NULL); } -- cgit 1.4.1 From 812ce33eadfc419795a5d1c7fe8f487fdfe2188c Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:40:19 +0200 Subject: qobject: Drop superfluous includes of qemu-common.h Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-53-armbru@redhat.com> --- include/qapi/qmp/json-parser.h | 1 - qobject/json-lexer.c | 1 - qobject/json-streamer.c | 1 - qobject/qbool.c | 1 - qobject/qlist.c | 1 - qobject/qnull.c | 1 - qobject/qnum.c | 1 - qobject/qobject.c | 1 - qobject/qstring.c | 1 - 9 files changed, 9 deletions(-) (limited to 'qobject/json-lexer.c') diff --git a/include/qapi/qmp/json-parser.h b/include/qapi/qmp/json-parser.h index 21b23d7bec..55f75954c3 100644 --- a/include/qapi/qmp/json-parser.h +++ b/include/qapi/qmp/json-parser.h @@ -14,7 +14,6 @@ #ifndef QEMU_JSON_PARSER_H #define QEMU_JSON_PARSER_H -#include "qemu-common.h" #include "qapi/qmp/json-lexer.h" typedef struct JSONToken JSONToken; diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c index a728c32faa..06ec67dc45 100644 --- a/qobject/json-lexer.c +++ b/qobject/json-lexer.c @@ -12,7 +12,6 @@ */ #include "qemu/osdep.h" -#include "qemu-common.h" #include "qapi/qmp/json-lexer.h" #include "qapi/qmp/json-streamer.h" diff --git a/qobject/json-streamer.c b/qobject/json-streamer.c index 467bc29413..da53e770e9 100644 --- a/qobject/json-streamer.c +++ b/qobject/json-streamer.c @@ -12,7 +12,6 @@ */ #include "qemu/osdep.h" -#include "qemu-common.h" #include "qapi/error.h" #include "qapi/qmp/json-lexer.h" #include "qapi/qmp/json-parser.h" diff --git a/qobject/qbool.c b/qobject/qbool.c index b58249925c..06dfc43498 100644 --- a/qobject/qbool.c +++ b/qobject/qbool.c @@ -13,7 +13,6 @@ #include "qemu/osdep.h" #include "qapi/qmp/qbool.h" -#include "qemu-common.h" /** * qbool_from_bool(): Create a new QBool from a bool diff --git a/qobject/qlist.c b/qobject/qlist.c index 37c1c167f1..b3274af88b 100644 --- a/qobject/qlist.c +++ b/qobject/qlist.c @@ -17,7 +17,6 @@ #include "qapi/qmp/qnum.h" #include "qapi/qmp/qstring.h" #include "qemu/queue.h" -#include "qemu-common.h" /** * qlist_new(): Create a new QList diff --git a/qobject/qnull.c b/qobject/qnull.c index f6f55f11ea..00870a1824 100644 --- a/qobject/qnull.c +++ b/qobject/qnull.c @@ -11,7 +11,6 @@ */ #include "qemu/osdep.h" -#include "qemu-common.h" #include "qapi/qmp/qnull.h" QNull qnull_ = { diff --git a/qobject/qnum.c b/qobject/qnum.c index 1501c82832..7012fc57f2 100644 --- a/qobject/qnum.c +++ b/qobject/qnum.c @@ -14,7 +14,6 @@ #include "qemu/osdep.h" #include "qapi/qmp/qnum.h" -#include "qemu-common.h" /** * qnum_from_int(): Create a new QNum from an int64_t diff --git a/qobject/qobject.c b/qobject/qobject.c index cf4b7e229e..878dd76e79 100644 --- a/qobject/qobject.c +++ b/qobject/qobject.c @@ -8,7 +8,6 @@ */ #include "qemu/osdep.h" -#include "qemu-common.h" #include "qapi/qmp/qbool.h" #include "qapi/qmp/qnull.h" #include "qapi/qmp/qnum.h" diff --git a/qobject/qstring.c b/qobject/qstring.c index 0f1510e792..1c6897df00 100644 --- a/qobject/qstring.c +++ b/qobject/qstring.c @@ -12,7 +12,6 @@ #include "qemu/osdep.h" #include "qapi/qmp/qstring.h" -#include "qemu-common.h" /** * qstring_new(): Create a new empty QString -- cgit 1.4.1 From 86cdf9ec8dec2763702cc52fa412d108a5dc9608 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:40:20 +0200 Subject: json: Clean up headers The JSON parser has three public headers, json-lexer.h, json-parser.h, json-streamer.h. They all contain stuff that is of no interest outside qobject/json-*.c. Collect the public interface in include/qapi/qmp/json-parser.h, and everything else in qobject/json-parser-int.h. Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-54-armbru@redhat.com> --- include/qapi/qmp/json-lexer.h | 50 ------------------------------------- include/qapi/qmp/json-parser.h | 36 +++++++++++++++++++++------ include/qapi/qmp/json-streamer.h | 46 ---------------------------------- monitor.c | 2 +- qga/main.c | 2 +- qobject/json-lexer.c | 3 +-- qobject/json-parser-int.h | 54 ++++++++++++++++++++++++++++++++++++++++ qobject/json-parser.c | 4 +-- qobject/json-streamer.c | 4 +-- qobject/qjson.c | 2 +- tests/libqtest.c | 2 +- 11 files changed, 90 insertions(+), 115 deletions(-) delete mode 100644 include/qapi/qmp/json-lexer.h delete mode 100644 include/qapi/qmp/json-streamer.h create mode 100644 qobject/json-parser-int.h (limited to 'qobject/json-lexer.c') diff --git a/include/qapi/qmp/json-lexer.h b/include/qapi/qmp/json-lexer.h deleted file mode 100644 index 508fc7bdaf..0000000000 --- a/include/qapi/qmp/json-lexer.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * JSON lexer - * - * Copyright IBM, Corp. 2009 - * - * Authors: - * Anthony Liguori - * - * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. - * See the COPYING.LIB file in the top-level directory. - * - */ - -#ifndef QEMU_JSON_LEXER_H -#define QEMU_JSON_LEXER_H - - -typedef enum json_token_type { - JSON_MIN = 100, - JSON_LCURLY = JSON_MIN, - JSON_RCURLY, - JSON_LSQUARE, - JSON_RSQUARE, - JSON_COLON, - JSON_COMMA, - JSON_INTEGER, - JSON_FLOAT, - JSON_KEYWORD, - JSON_STRING, - JSON_INTERP, - JSON_SKIP, - JSON_ERROR, - JSON_END_OF_INPUT, -} JSONTokenType; - -typedef struct JSONLexer { - int start_state, state; - GString *token; - int x, y; -} JSONLexer; - -void json_lexer_init(JSONLexer *lexer, bool enable_interpolation); - -void json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size); - -void json_lexer_flush(JSONLexer *lexer); - -void json_lexer_destroy(JSONLexer *lexer); - -#endif diff --git a/include/qapi/qmp/json-parser.h b/include/qapi/qmp/json-parser.h index 55f75954c3..7345a9bd5c 100644 --- a/include/qapi/qmp/json-parser.h +++ b/include/qapi/qmp/json-parser.h @@ -1,5 +1,5 @@ /* - * JSON Parser + * JSON Parser * * Copyright IBM, Corp. 2009 * @@ -11,14 +11,36 @@ * */ -#ifndef QEMU_JSON_PARSER_H -#define QEMU_JSON_PARSER_H +#ifndef QAPI_QMP_JSON_PARSER_H +#define QAPI_QMP_JSON_PARSER_H -#include "qapi/qmp/json-lexer.h" +typedef struct JSONLexer { + int start_state, state; + GString *token; + int x, y; +} JSONLexer; -typedef struct JSONToken JSONToken; +typedef struct JSONMessageParser { + void (*emit)(void *opaque, QObject *json, Error *err); + void *opaque; + va_list *ap; + JSONLexer lexer; + int brace_count; + int bracket_count; + GQueue tokens; + uint64_t token_size; +} JSONMessageParser; -JSONToken *json_token(JSONTokenType type, int x, int y, GString *tokstr); -QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp); +void json_message_parser_init(JSONMessageParser *parser, + void (*emit)(void *opaque, QObject *json, + Error *err), + void *opaque, va_list *ap); + +void json_message_parser_feed(JSONMessageParser *parser, + const char *buffer, size_t size); + +void json_message_parser_flush(JSONMessageParser *parser); + +void json_message_parser_destroy(JSONMessageParser *parser); #endif diff --git a/include/qapi/qmp/json-streamer.h b/include/qapi/qmp/json-streamer.h deleted file mode 100644 index 29950ac37c..0000000000 --- a/include/qapi/qmp/json-streamer.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * JSON streaming support - * - * Copyright IBM, Corp. 2009 - * - * Authors: - * Anthony Liguori - * - * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. - * See the COPYING.LIB file in the top-level directory. - * - */ - -#ifndef QEMU_JSON_STREAMER_H -#define QEMU_JSON_STREAMER_H - -#include "qapi/qmp/json-lexer.h" - -typedef struct JSONMessageParser -{ - void (*emit)(void *opaque, QObject *json, Error *err); - void *opaque; - va_list *ap; - JSONLexer lexer; - int brace_count; - int bracket_count; - GQueue tokens; - uint64_t token_size; -} JSONMessageParser; - -void json_message_process_token(JSONLexer *lexer, GString *input, - JSONTokenType type, int x, int y); - -void json_message_parser_init(JSONMessageParser *parser, - void (*emit)(void *opaque, QObject *json, - Error *err), - void *opaque, va_list *ap); - -void json_message_parser_feed(JSONMessageParser *parser, - const char *buffer, size_t size); - -void json_message_parser_flush(JSONMessageParser *parser); - -void json_message_parser_destroy(JSONMessageParser *parser); - -#endif diff --git a/monitor.c b/monitor.c index 3dbdcb5190..021c11b1bf 100644 --- a/monitor.c +++ b/monitor.c @@ -58,7 +58,7 @@ #include "qapi/qmp/qnum.h" #include "qapi/qmp/qstring.h" #include "qapi/qmp/qjson.h" -#include "qapi/qmp/json-streamer.h" +#include "qapi/qmp/json-parser.h" #include "qapi/qmp/qlist.h" #include "qom/object_interfaces.h" #include "trace-root.h" diff --git a/qga/main.c b/qga/main.c index b74e1241ef..6d70242d05 100644 --- a/qga/main.c +++ b/qga/main.c @@ -18,7 +18,7 @@ #include #include #endif -#include "qapi/qmp/json-streamer.h" +#include "qapi/qmp/json-parser.h" #include "qapi/qmp/qdict.h" #include "qapi/qmp/qjson.h" #include "qapi/qmp/qstring.h" diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c index 06ec67dc45..e1745a3d95 100644 --- a/qobject/json-lexer.c +++ b/qobject/json-lexer.c @@ -12,8 +12,7 @@ */ #include "qemu/osdep.h" -#include "qapi/qmp/json-lexer.h" -#include "qapi/qmp/json-streamer.h" +#include "json-parser-int.h" #define MAX_TOKEN_SIZE (64ULL << 20) diff --git a/qobject/json-parser-int.h b/qobject/json-parser-int.h new file mode 100644 index 0000000000..ceaa890ec6 --- /dev/null +++ b/qobject/json-parser-int.h @@ -0,0 +1,54 @@ +/* + * JSON Parser + * + * Copyright IBM, Corp. 2009 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef JSON_PARSER_INT_H +#define JSON_PARSER_INT_H + +#include "qapi/qmp/json-parser.h" + + +typedef enum json_token_type { + JSON_MIN = 100, + JSON_LCURLY = JSON_MIN, + JSON_RCURLY, + JSON_LSQUARE, + JSON_RSQUARE, + JSON_COLON, + JSON_COMMA, + JSON_INTEGER, + JSON_FLOAT, + JSON_KEYWORD, + JSON_STRING, + JSON_INTERP, + JSON_SKIP, + JSON_ERROR, + JSON_END_OF_INPUT, +} JSONTokenType; + +typedef struct JSONToken JSONToken; + +/* json-lexer.c */ +void json_lexer_init(JSONLexer *lexer, bool enable_interpolation); +void json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size); +void json_lexer_flush(JSONLexer *lexer); +void json_lexer_destroy(JSONLexer *lexer); + +/* json-streamer.c */ +void json_message_process_token(JSONLexer *lexer, GString *input, + JSONTokenType type, int x, int y); + +/* json-parser.c */ +JSONToken *json_token(JSONTokenType type, int x, int y, GString *tokstr); +QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp); + +#endif diff --git a/qobject/json-parser.c b/qobject/json-parser.c index a247875f14..7449684f1c 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -22,9 +22,7 @@ #include "qapi/qmp/qnull.h" #include "qapi/qmp/qnum.h" #include "qapi/qmp/qstring.h" -#include "qapi/qmp/json-parser.h" -#include "qapi/qmp/json-lexer.h" -#include "qapi/qmp/json-streamer.h" +#include "json-parser-int.h" struct JSONToken { JSONTokenType type; diff --git a/qobject/json-streamer.c b/qobject/json-streamer.c index da53e770e9..47dd7ea576 100644 --- a/qobject/json-streamer.c +++ b/qobject/json-streamer.c @@ -13,9 +13,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "qapi/qmp/json-lexer.h" -#include "qapi/qmp/json-parser.h" -#include "qapi/qmp/json-streamer.h" +#include "json-parser-int.h" #define MAX_TOKEN_SIZE (64ULL << 20) #define MAX_TOKEN_COUNT (2ULL << 20) diff --git a/qobject/qjson.c b/qobject/qjson.c index b9ccae2c2a..db36101f3b 100644 --- a/qobject/qjson.c +++ b/qobject/qjson.c @@ -13,7 +13,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "qapi/qmp/json-streamer.h" +#include "qapi/qmp/json-parser.h" #include "qapi/qmp/qjson.h" #include "qapi/qmp/qbool.h" #include "qapi/qmp/qdict.h" diff --git a/tests/libqtest.c b/tests/libqtest.c index 5973a67652..d635c5bea0 100644 --- a/tests/libqtest.c +++ b/tests/libqtest.c @@ -24,7 +24,7 @@ #include "qemu-common.h" #include "qemu/cutils.h" #include "qapi/error.h" -#include "qapi/qmp/json-streamer.h" +#include "qapi/qmp/json-parser.h" #include "qapi/qmp/qdict.h" #include "qapi/qmp/qjson.h" #include "qapi/qmp/qlist.h" -- cgit 1.4.1