mirror of
https://github.com/json-c/json-c.git
synced 2026-03-27 08:59:07 +08:00
Issue #616: Change the parsing of surrogate pairs in unicode escapes so it uses a couple of additional states instead of assuming the low surrogate is already present, to ensure that we correctly handle various cases of incremental parsing.
This commit is contained in:
@@ -68,8 +68,8 @@ static void single_incremental_parse(const char *test_string, int clear_serializ
|
||||
|
||||
if (strcmp(all_at_once_str, new_str) != 0)
|
||||
{
|
||||
printf("ERROR: failed to parse (%s) in %d byte chunks: %s != %s\n",
|
||||
test_string, chunksize, all_at_once_str, new_str);
|
||||
printf("ERROR: failed to parse (%s) in %d byte chunks: %s != %s\n", test_string,
|
||||
chunksize, all_at_once_str, new_str);
|
||||
}
|
||||
json_tokener_free(tok);
|
||||
}
|
||||
@@ -193,8 +193,8 @@ static void test_utf8_parse()
|
||||
// json_tokener_parse doesn't support checking for byte order marks.
|
||||
// It's the responsibility of the caller to detect and skip a BOM.
|
||||
// Both of these checks return null.
|
||||
char* utf8_bom = "\xEF\xBB\xBF";
|
||||
char* utf8_bom_and_chars = "\xEF\xBB\xBF{}";
|
||||
char *utf8_bom = "\xEF\xBB\xBF";
|
||||
char *utf8_bom_and_chars = "\xEF\xBB\xBF{}";
|
||||
single_basic_parse(utf8_bom, 0);
|
||||
single_basic_parse(utf8_bom_and_chars, 0);
|
||||
}
|
||||
@@ -245,7 +245,7 @@ struct incremental_step
|
||||
int char_offset;
|
||||
enum json_tokener_error expected_error;
|
||||
int reset_tokener; /* Set to 1 to call json_tokener_reset() after parsing */
|
||||
int tok_flags; /* JSON_TOKENER_* flags to pass to json_tokener_set_flags() */
|
||||
int tok_flags; /* JSON_TOKENER_* flags to pass to json_tokener_set_flags() */
|
||||
} incremental_steps[] = {
|
||||
|
||||
/* Check that full json messages can be parsed, both w/ and w/o a reset */
|
||||
@@ -268,7 +268,11 @@ struct incremental_step
|
||||
{"\": {\"bar", -1, -1, json_tokener_continue, 0},
|
||||
{"\":13}}", -1, -1, json_tokener_success, 1},
|
||||
|
||||
/* Check the UTF-16 surrogate pair */
|
||||
/* Check the UTF-16 surrogate pair handling in various ways.
|
||||
* Note: \ud843\udd1e is u+1D11E, Musical Symbol G Clef
|
||||
* Your terminal may not display these correctly, in particular
|
||||
* PuTTY doesn't currently show this character.
|
||||
*/
|
||||
/* parse one char at every time */
|
||||
{"\"\\", -1, -1, json_tokener_continue, 0},
|
||||
{"u", -1, -1, json_tokener_continue, 0},
|
||||
@@ -296,6 +300,16 @@ struct incremental_step
|
||||
{"udd1e\"", -1, -1, json_tokener_success, 1},
|
||||
{"\"\\ud834\\u", -1, -1, json_tokener_continue, 0},
|
||||
{"dd1e\"", -1, -1, json_tokener_success, 1},
|
||||
{"\"fff \\ud834\\ud", -1, -1, json_tokener_continue, 0},
|
||||
{"d1e bar\"", -1, -1, json_tokener_success, 1},
|
||||
{"\"fff \\ud834\\udd", -1, -1, json_tokener_continue, 0},
|
||||
{"1e bar\"", -1, -1, json_tokener_success, 1},
|
||||
|
||||
/* \ud83d\ude00 is U+1F600, Grinning Face
|
||||
* Displays fine in PuTTY, though you may need "less -r"
|
||||
*/
|
||||
{"\"fff \\ud83d\\ude", -1, -1, json_tokener_continue, 0},
|
||||
{"00 bar\"", -1, -1, json_tokener_success, 1},
|
||||
|
||||
/* Check that json_tokener_reset actually resets */
|
||||
{"{ \"foo", -1, -1, json_tokener_continue, 1},
|
||||
|
||||
@@ -124,6 +124,12 @@ json_tokener_parse_ex(tok, "\ud834\ , 8) ... OK: got correct error: continu
|
||||
json_tokener_parse_ex(tok, udd1e" , 6) ... OK: got object of type [string]: "𝄞"
|
||||
json_tokener_parse_ex(tok, "\ud834\u , 9) ... OK: got correct error: continue
|
||||
json_tokener_parse_ex(tok, dd1e" , 5) ... OK: got object of type [string]: "𝄞"
|
||||
json_tokener_parse_ex(tok, "fff \ud834\ud, 14) ... OK: got correct error: continue
|
||||
json_tokener_parse_ex(tok, d1e bar" , 8) ... OK: got object of type [string]: "fff 𝄞 bar"
|
||||
json_tokener_parse_ex(tok, "fff \ud834\udd, 15) ... OK: got correct error: continue
|
||||
json_tokener_parse_ex(tok, 1e bar" , 7) ... OK: got object of type [string]: "fff 𝄞 bar"
|
||||
json_tokener_parse_ex(tok, "fff \ud83d\ude, 15) ... OK: got correct error: continue
|
||||
json_tokener_parse_ex(tok, 00 bar" , 7) ... OK: got object of type [string]: "fff 😀 bar"
|
||||
json_tokener_parse_ex(tok, { "foo , 6) ... OK: got correct error: continue
|
||||
json_tokener_parse_ex(tok, : "bar"} , 8) ... OK: got correct error: unexpected character
|
||||
json_tokener_parse_ex(tok, { "foo , 6) ... OK: got correct error: continue
|
||||
@@ -240,5 +246,5 @@ json_tokener_parse_ex(tok, "\ud855
|
||||
json_tokener_parse_ex(tok, "\ud0031<33>" , 10) ... OK: got correct error: invalid utf-8 string
|
||||
json_tokener_parse_ex(tok, 11<31>11 , 5) ... OK: got correct error: invalid utf-8 string
|
||||
json_tokener_parse_ex(tok, {"1<>":1} , 8) ... OK: got correct error: invalid utf-8 string
|
||||
End Incremental Tests OK=154 ERROR=0
|
||||
End Incremental Tests OK=160 ERROR=0
|
||||
==================================
|
||||
|
||||
Reference in New Issue
Block a user