Merge pull request #531 from dota17/utf8test

validate utf-8 string
This commit is contained in:
Eric Haszlakiewicz
2020-02-05 21:56:06 -05:00
committed by GitHub
4 changed files with 125 additions and 5 deletions

View File

@@ -355,6 +355,39 @@ struct incremental_step {
{ "[1,2,3,]", -1, 7, json_tokener_error_parse_unexpected, 3 },
{ "{\"a\":1,}", -1, 7, json_tokener_error_parse_unexpected, 3 },
// utf-8 test
// acsll encoding
{ "\x22\x31\x32\x33\x61\x73\x63\x24\x25\x26\x22",-1, -1, json_tokener_success, 5 },
{ "\x22\x31\x32\x33\x61\x73\x63\x24\x25\x26\x22",-1, -1, json_tokener_success, 1 },
// utf-8 encoding
{ "\x22\xe4\xb8\x96\xe7\x95\x8c\x22",-1, -1, json_tokener_success, 5 },
{ "\x22\xe4\xb8",-1, 3, json_tokener_error_parse_utf8_string, 4 },
{ "\x96\xe7\x95\x8c\x22",-1, 0, json_tokener_error_parse_utf8_string, 5 },
{ "\x22\xe4\xb8\x96\xe7\x95\x8c\x22",-1, -1, json_tokener_success, 1 },
{ "\x22\xcf\x80\xcf\x86\x22",-1, -1, json_tokener_success, 5 },
{ "\x22\xf0\xa5\x91\x95\x22",-1, -1, json_tokener_success, 5 },
// wrong utf-8 encoding
{ "\x22\xe6\x9d\x4e\x22",-1, 3, json_tokener_error_parse_utf8_string, 5 },
{ "\x22\xe6\x9d\x4e\x22",-1, 5, json_tokener_success, 1 },
// GBK encoding
{ "\x22\xc0\xee\xc5\xf4\x22",-1, 2, json_tokener_error_parse_utf8_string, 5 },
{ "\x22\xc0\xee\xc5\xf4\x22",-1, 6, json_tokener_success, 1 },
// char after space
{ "\x20\x20\x22\xe4\xb8\x96\x22",-1, -1, json_tokener_success, 5 },
{ "\x20\x20\x81\x22\xe4\xb8\x96\x22",-1, 2, json_tokener_error_parse_utf8_string, 5 },
{ "\x5b\x20\x81\x31\x5d",-1, 2, json_tokener_error_parse_utf8_string, 5 },
// char in state inf
{ "\x49\x6e\x66\x69\x6e\x69\x74\x79",9, 8, json_tokener_success, 1 },
{ "\x49\x6e\x66\x81\x6e\x69\x74\x79",-1, 3, json_tokener_error_parse_utf8_string, 5 },
// char in escape unicode
{ "\x22\x5c\x75\x64\x38\x35\x35\x5c\x75\x64\x63\x35\x35\x22",15, 14, json_tokener_success, 5 },
{ "\x22\x5c\x75\x64\x38\x35\x35\xc0\x75\x64\x63\x35\x35\x22",-1, 8, json_tokener_error_parse_utf8_string, 5 },
{ "\x22\x5c\x75\x64\x30\x30\x33\x31\xc0\x22",-1, 9, json_tokener_error_parse_utf8_string, 5 },
// char in number
{ "\x31\x31\x81\x31\x31",-1, 2, json_tokener_error_parse_utf8_string, 5 },
// char in object
{ "\x7b\x22\x31\x81\x22\x3a\x31\x7d",-1, 3, json_tokener_error_parse_utf8_string, 5 },
{ NULL, -1, -1, json_tokener_success, 0 },
};
@@ -389,9 +422,19 @@ static void test_incremental_parse()
size_t expected_char_offset;
if (step->reset_tokener & 2)
json_tokener_set_flags(tok, JSON_TOKENER_STRICT);
{
if (step->reset_tokener & 4)
json_tokener_set_flags(tok, 3);
else
json_tokener_set_flags(tok, JSON_TOKENER_STRICT);
}
else
json_tokener_set_flags(tok, 0);
{
if (step->reset_tokener & 4)
json_tokener_set_flags(tok, JSON_TOKENER_VALIDATE_UTF8);
else
json_tokener_set_flags(tok, 0);
}
if (length == -1)
length = strlen(step->string_to_parse);