Issue #616: Change the parsing of surrogate pairs in unicode escapes so it uses a couple of additional states instead of assuming the low surrogate is already present, to ensure that we correctly handle various cases of incremental parsing.

This commit is contained in:
Eric Haszlakiewicz
2020-06-21 18:17:40 +00:00
parent 197e372464
commit a68566bf6a
5 changed files with 172 additions and 172 deletions

View File

@@ -68,8 +68,8 @@ static void single_incremental_parse(const char *test_string, int clear_serializ
if (strcmp(all_at_once_str, new_str) != 0)
{
printf("ERROR: failed to parse (%s) in %d byte chunks: %s != %s\n",
test_string, chunksize, all_at_once_str, new_str);
printf("ERROR: failed to parse (%s) in %d byte chunks: %s != %s\n", test_string,
chunksize, all_at_once_str, new_str);
}
json_tokener_free(tok);
}
@@ -193,8 +193,8 @@ static void test_utf8_parse()
// json_tokener_parse doesn't support checking for byte order marks.
// It's the responsibility of the caller to detect and skip a BOM.
// Both of these checks return null.
char* utf8_bom = "\xEF\xBB\xBF";
char* utf8_bom_and_chars = "\xEF\xBB\xBF{}";
char *utf8_bom = "\xEF\xBB\xBF";
char *utf8_bom_and_chars = "\xEF\xBB\xBF{}";
single_basic_parse(utf8_bom, 0);
single_basic_parse(utf8_bom_and_chars, 0);
}
@@ -245,7 +245,7 @@ struct incremental_step
int char_offset;
enum json_tokener_error expected_error;
int reset_tokener; /* Set to 1 to call json_tokener_reset() after parsing */
int tok_flags; /* JSON_TOKENER_* flags to pass to json_tokener_set_flags() */
int tok_flags; /* JSON_TOKENER_* flags to pass to json_tokener_set_flags() */
} incremental_steps[] = {
/* Check that full json messages can be parsed, both w/ and w/o a reset */
@@ -268,7 +268,11 @@ struct incremental_step
{"\": {\"bar", -1, -1, json_tokener_continue, 0},
{"\":13}}", -1, -1, json_tokener_success, 1},
/* Check the UTF-16 surrogate pair */
/* Check the UTF-16 surrogate pair handling in various ways.
* Note: \ud843\udd1e is u+1D11E, Musical Symbol G Clef
* Your terminal may not display these correctly, in particular
* PuTTY doesn't currently show this character.
*/
/* parse one char at every time */
{"\"\\", -1, -1, json_tokener_continue, 0},
{"u", -1, -1, json_tokener_continue, 0},
@@ -296,6 +300,16 @@ struct incremental_step
{"udd1e\"", -1, -1, json_tokener_success, 1},
{"\"\\ud834\\u", -1, -1, json_tokener_continue, 0},
{"dd1e\"", -1, -1, json_tokener_success, 1},
{"\"fff \\ud834\\ud", -1, -1, json_tokener_continue, 0},
{"d1e bar\"", -1, -1, json_tokener_success, 1},
{"\"fff \\ud834\\udd", -1, -1, json_tokener_continue, 0},
{"1e bar\"", -1, -1, json_tokener_success, 1},
/* \ud83d\ude00 is U+1F600, Grinning Face
* Displays fine in PuTTY, though you may need "less -r"
*/
{"\"fff \\ud83d\\ude", -1, -1, json_tokener_continue, 0},
{"00 bar\"", -1, -1, json_tokener_success, 1},
/* Check that json_tokener_reset actually resets */
{"{ \"foo", -1, -1, json_tokener_continue, 1},

View File

@@ -124,6 +124,12 @@ json_tokener_parse_ex(tok, "\ud834\ , 8) ... OK: got correct error: continu
json_tokener_parse_ex(tok, udd1e" , 6) ... OK: got object of type [string]: "𝄞"
json_tokener_parse_ex(tok, "\ud834\u , 9) ... OK: got correct error: continue
json_tokener_parse_ex(tok, dd1e" , 5) ... OK: got object of type [string]: "𝄞"
json_tokener_parse_ex(tok, "fff \ud834\ud, 14) ... OK: got correct error: continue
json_tokener_parse_ex(tok, d1e bar" , 8) ... OK: got object of type [string]: "fff 𝄞 bar"
json_tokener_parse_ex(tok, "fff \ud834\udd, 15) ... OK: got correct error: continue
json_tokener_parse_ex(tok, 1e bar" , 7) ... OK: got object of type [string]: "fff 𝄞 bar"
json_tokener_parse_ex(tok, "fff \ud83d\ude, 15) ... OK: got correct error: continue
json_tokener_parse_ex(tok, 00 bar" , 7) ... OK: got object of type [string]: "fff 😀 bar"
json_tokener_parse_ex(tok, { "foo , 6) ... OK: got correct error: continue
json_tokener_parse_ex(tok, : "bar"} , 8) ... OK: got correct error: unexpected character
json_tokener_parse_ex(tok, { "foo , 6) ... OK: got correct error: continue
@@ -240,5 +246,5 @@ json_tokener_parse_ex(tok, "\ud855
json_tokener_parse_ex(tok, "\ud0031<33>" , 10) ... OK: got correct error: invalid utf-8 string
json_tokener_parse_ex(tok, 11<31>11 , 5) ... OK: got correct error: invalid utf-8 string
json_tokener_parse_ex(tok, {"1<>":1} , 8) ... OK: got correct error: invalid utf-8 string
End Incremental Tests OK=154 ERROR=0
End Incremental Tests OK=160 ERROR=0
==================================