mirror of
https://github.com/json-c/json-c.git
synced 2026-04-05 13:29:06 +08:00
test utf8
This commit is contained in:
@@ -83,6 +83,7 @@ static const char* json_tokener_errors[] = {
|
|||||||
"object value separator ',' expected",
|
"object value separator ',' expected",
|
||||||
"invalid string sequence",
|
"invalid string sequence",
|
||||||
"expected comment",
|
"expected comment",
|
||||||
|
"invalid utf-8 string",
|
||||||
"buffer size overflow"
|
"buffer size overflow"
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -222,8 +223,12 @@ struct json_object* json_tokener_parse_verbose(const char *str,
|
|||||||
: \
|
: \
|
||||||
(((tok)->err = json_tokener_continue), 0) \
|
(((tok)->err = json_tokener_continue), 0) \
|
||||||
) : \
|
) : \
|
||||||
(((dest) = *str), 1) \
|
(((tok->flags & JSON_TOKENER_STRICT) && \
|
||||||
)
|
(!json_tokener_validate_utf8(*str, nBytesp)))? \
|
||||||
|
((tok->err = json_tokener_error_parse_utf8_string), 0) \
|
||||||
|
: \
|
||||||
|
(((dest) = *str), 1) \
|
||||||
|
))
|
||||||
|
|
||||||
/* ADVANCE_CHAR() macro:
|
/* ADVANCE_CHAR() macro:
|
||||||
* Increments str & tok->char_offset.
|
* Increments str & tok->char_offset.
|
||||||
@@ -242,6 +247,9 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
|
|||||||
{
|
{
|
||||||
struct json_object *obj = NULL;
|
struct json_object *obj = NULL;
|
||||||
char c = '\1';
|
char c = '\1';
|
||||||
|
unsigned int nBytes = 0;
|
||||||
|
unsigned int *nBytesp = &nBytes;
|
||||||
|
|
||||||
#ifdef HAVE_USELOCALE
|
#ifdef HAVE_USELOCALE
|
||||||
locale_t oldlocale = uselocale(NULL);
|
locale_t oldlocale = uselocale(NULL);
|
||||||
locale_t newloc;
|
locale_t newloc;
|
||||||
@@ -948,6 +956,10 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
|
|||||||
} /* while(PEEK_CHAR) */
|
} /* while(PEEK_CHAR) */
|
||||||
|
|
||||||
out:
|
out:
|
||||||
|
if ((tok->flags & JSON_TOKENER_STRICT) && (nBytes != 0))
|
||||||
|
{
|
||||||
|
tok->err = json_tokener_error_parse_utf8_string;
|
||||||
|
}
|
||||||
if (c &&
|
if (c &&
|
||||||
(state == json_tokener_state_finish) &&
|
(state == json_tokener_state_finish) &&
|
||||||
(tok->depth == 0) &&
|
(tok->depth == 0) &&
|
||||||
@@ -985,6 +997,37 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes)
|
||||||
|
{
|
||||||
|
unsigned char chr = c;
|
||||||
|
if (*nBytes == 0)
|
||||||
|
{
|
||||||
|
if (chr >= 0x80)
|
||||||
|
{
|
||||||
|
if(chr >= 0xFC && chr <= 0xFd)
|
||||||
|
*nBytes = 6;
|
||||||
|
else if (chr >= 0xF8)
|
||||||
|
*nBytes = 5;
|
||||||
|
else if (chr >= 0xF0)
|
||||||
|
*nBytes = 4;
|
||||||
|
else if (chr >= 0xE0)
|
||||||
|
*nBytes = 3;
|
||||||
|
else if (chr >= 0xC0)
|
||||||
|
*nBytes = 2;
|
||||||
|
else
|
||||||
|
return 0;
|
||||||
|
(*nBytes)--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if ((chr & 0xC0) != 0x80)
|
||||||
|
return 0;
|
||||||
|
(*nBytes)--;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
void json_tokener_set_flags(struct json_tokener *tok, int flags)
|
void json_tokener_set_flags(struct json_tokener *tok, int flags)
|
||||||
{
|
{
|
||||||
tok->flags = flags;
|
tok->flags = flags;
|
||||||
|
|||||||
@@ -38,6 +38,7 @@ enum json_tokener_error {
|
|||||||
json_tokener_error_parse_object_value_sep,
|
json_tokener_error_parse_object_value_sep,
|
||||||
json_tokener_error_parse_string,
|
json_tokener_error_parse_string,
|
||||||
json_tokener_error_parse_comment,
|
json_tokener_error_parse_comment,
|
||||||
|
json_tokener_error_parse_utf8_string,
|
||||||
json_tokener_error_size
|
json_tokener_error_size
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -162,6 +163,11 @@ JSON_EXPORT void json_tokener_reset(struct json_tokener *tok);
|
|||||||
JSON_EXPORT struct json_object* json_tokener_parse(const char *str);
|
JSON_EXPORT struct json_object* json_tokener_parse(const char *str);
|
||||||
JSON_EXPORT struct json_object* json_tokener_parse_verbose(const char *str, enum json_tokener_error *error);
|
JSON_EXPORT struct json_object* json_tokener_parse_verbose(const char *str, enum json_tokener_error *error);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* validete the utf-8 string in strict model.
|
||||||
|
* if not utf-8 format, return err.
|
||||||
|
*/
|
||||||
|
json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes);
|
||||||
/**
|
/**
|
||||||
* Set flags that control how parsing will be done.
|
* Set flags that control how parsing will be done.
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -355,6 +355,41 @@ struct incremental_step {
|
|||||||
{ "[1,2,3,]", -1, 7, json_tokener_error_parse_unexpected, 3 },
|
{ "[1,2,3,]", -1, 7, json_tokener_error_parse_unexpected, 3 },
|
||||||
{ "{\"a\":1,}", -1, 7, json_tokener_error_parse_unexpected, 3 },
|
{ "{\"a\":1,}", -1, 7, json_tokener_error_parse_unexpected, 3 },
|
||||||
|
|
||||||
|
// utf-8 test
|
||||||
|
// acsll encoding
|
||||||
|
{ "\x22\x31\x32\x33\x61\x73\x63\x24\x25\x26\x22",-1, -1, json_tokener_success, 3 },
|
||||||
|
{ "\x22\x31\x32\x33\x61\x73\x63\x24\x25\x26\x22",-1, -1, json_tokener_success, 1 },
|
||||||
|
// utf-8 encoding
|
||||||
|
{ "\x22\xe4\xb8\x96\xe7\x95\x8c\x22",-1, -1, json_tokener_success, 3 },
|
||||||
|
{ "\x22\xe4\xb8",-1, -1, json_tokener_error_parse_utf8_string, 2 },
|
||||||
|
{ "\x96\xe7\x95\x8c\x22",-1, 0, json_tokener_error_parse_utf8_string, 3 },
|
||||||
|
{ "\x22\xe4\xb8\x96\xe7\x95\x8c\x22",-1, -1, json_tokener_success, 1 },
|
||||||
|
{ "\x22\xcf\x80\xcf\x86\x22",-1, -1, json_tokener_success, 3 },
|
||||||
|
{ "\x22\xf0\xa5\x91\x95\x22",-1, -1, json_tokener_success, 3 },
|
||||||
|
{ "\x22\xf8\xa5\xa5\x91\x95\x22",-1, -1, json_tokener_success, 3 },
|
||||||
|
{ "\x22\xfd\xa5\xa5\xa5\x91\x95\x22",-1, -1, json_tokener_success, 3 },
|
||||||
|
// wrong utf-8 encoding
|
||||||
|
{ "\x22\xe6\x9d\x4e\x22",-1, 3, json_tokener_error_parse_utf8_string, 3 },
|
||||||
|
{ "\x22\xe6\x9d\x4e\x22",-1, 5, json_tokener_success, 1 },
|
||||||
|
// GBK encoding
|
||||||
|
{ "\x22\xc0\xee\xc5\xf4\x22",-1, 2, json_tokener_error_parse_utf8_string, 3 },
|
||||||
|
{ "\x22\xc0\xee\xc5\xf4\x22",-1, 6, json_tokener_success, 1 },
|
||||||
|
// char after space
|
||||||
|
{ "\x20\x20\x22\xe4\xb8\x96\x22",-1, -1, json_tokener_success, 3 },
|
||||||
|
{ "\x20\x20\x81\x22\xe4\xb8\x96\x22",-1, 2, json_tokener_error_parse_utf8_string, 3 },
|
||||||
|
{ "\x5b\x20\x81\x31\x5d",-1, 2, json_tokener_error_parse_utf8_string, 3 },
|
||||||
|
// char in state inf
|
||||||
|
{ "\x49\x6e\x66\x69\x6e\x69\x74\x79",9, 8, json_tokener_success, 1 },
|
||||||
|
{ "\x49\x6e\x66\x81\x6e\x69\x74\x79",-1, 3, json_tokener_error_parse_utf8_string, 3 },
|
||||||
|
// char in escape unicode
|
||||||
|
{ "\x22\x5c\x75\x64\x38\x35\x35\x5c\x75\x64\x63\x35\x35\x22",15, 14, json_tokener_success, 3 },
|
||||||
|
{ "\x22\x5c\x75\x64\x38\x35\x35\xc0\x75\x64\x63\x35\x35\x22",-1, 8, json_tokener_error_parse_utf8_string, 3 },
|
||||||
|
{ "\x22\x5c\x75\x64\x30\x30\x33\x31\xc0\x22",-1, 9, json_tokener_error_parse_utf8_string, 3 },
|
||||||
|
// char in number
|
||||||
|
{ "\x31\x31\x81\x31\x31",-1, 2, json_tokener_error_parse_utf8_string, 3 },
|
||||||
|
// char in object
|
||||||
|
{ "\x7b\x22\x31\x81\x22\x3a\x31\x7d",-1, 3, json_tokener_error_parse_utf8_string, 3 },
|
||||||
|
|
||||||
{ NULL, -1, -1, json_tokener_success, 0 },
|
{ NULL, -1, -1, json_tokener_success, 0 },
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -183,5 +183,29 @@ json_tokener_parse_ex(tok, [1,2,3,] , 8) ... OK: got object of type [array]
|
|||||||
json_tokener_parse_ex(tok, [1,2,,3,] , 9) ... OK: got correct error: unexpected character
|
json_tokener_parse_ex(tok, [1,2,,3,] , 9) ... OK: got correct error: unexpected character
|
||||||
json_tokener_parse_ex(tok, [1,2,3,] , 8) ... OK: got correct error: unexpected character
|
json_tokener_parse_ex(tok, [1,2,3,] , 8) ... OK: got correct error: unexpected character
|
||||||
json_tokener_parse_ex(tok, {"a":1,} , 8) ... OK: got correct error: unexpected character
|
json_tokener_parse_ex(tok, {"a":1,} , 8) ... OK: got correct error: unexpected character
|
||||||
End Incremental Tests OK=105 ERROR=0
|
json_tokener_parse_ex(tok, "123asc$%&" , 11) ... OK: got object of type [string]: "123asc$%&"
|
||||||
|
json_tokener_parse_ex(tok, "123asc$%&" , 11) ... OK: got object of type [string]: "123asc$%&"
|
||||||
|
json_tokener_parse_ex(tok, "世界" , 8) ... OK: got object of type [string]: "世界"
|
||||||
|
json_tokener_parse_ex(tok, "<22><> , 3) ... OK: got correct error: invalid utf-8 string
|
||||||
|
json_tokener_parse_ex(tok, <20>界" , 5) ... OK: got correct error: invalid utf-8 string
|
||||||
|
json_tokener_parse_ex(tok, "世界" , 8) ... OK: got object of type [string]: "世界"
|
||||||
|
json_tokener_parse_ex(tok, "πφ" , 6) ... OK: got object of type [string]: "πφ"
|
||||||
|
json_tokener_parse_ex(tok, "𥑕" , 6) ... OK: got object of type [string]: "𥑕"
|
||||||
|
json_tokener_parse_ex(tok, "<22><><EFBFBD><EFBFBD><EFBFBD>" , 7) ... OK: got object of type [string]: "<22><><EFBFBD><EFBFBD><EFBFBD>"
|
||||||
|
json_tokener_parse_ex(tok, "<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>" , 8) ... OK: got object of type [string]: "<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>"
|
||||||
|
json_tokener_parse_ex(tok, "<22><>N" , 5) ... OK: got correct error: invalid utf-8 string
|
||||||
|
json_tokener_parse_ex(tok, "<22><>N" , 5) ... OK: got object of type [string]: "<22><>N"
|
||||||
|
json_tokener_parse_ex(tok, "<22><><EFBFBD><EFBFBD>" , 6) ... OK: got correct error: invalid utf-8 string
|
||||||
|
json_tokener_parse_ex(tok, "<22><><EFBFBD><EFBFBD>" , 6) ... OK: got object of type [string]: "<22><><EFBFBD><EFBFBD>"
|
||||||
|
json_tokener_parse_ex(tok, "世" , 7) ... OK: got object of type [string]: "世"
|
||||||
|
json_tokener_parse_ex(tok, <20>"世" , 8) ... OK: got correct error: invalid utf-8 string
|
||||||
|
json_tokener_parse_ex(tok, [ <20>1] , 5) ... OK: got correct error: invalid utf-8 string
|
||||||
|
json_tokener_parse_ex(tok, Infinity , 9) ... OK: got object of type [double]: Infinity
|
||||||
|
json_tokener_parse_ex(tok, Inf<6E>nity , 8) ... OK: got correct error: invalid utf-8 string
|
||||||
|
json_tokener_parse_ex(tok, "\ud855\udc55", 15) ... OK: got object of type [string]: "𥑕"
|
||||||
|
json_tokener_parse_ex(tok, "\ud855<35>udc55", 14) ... OK: got correct error: invalid utf-8 string
|
||||||
|
json_tokener_parse_ex(tok, "\ud0031<33>" , 10) ... OK: got correct error: invalid utf-8 string
|
||||||
|
json_tokener_parse_ex(tok, 11<31>11 , 5) ... OK: got correct error: invalid utf-8 string
|
||||||
|
json_tokener_parse_ex(tok, {"1<>":1} , 8) ... OK: got correct error: invalid utf-8 string
|
||||||
|
End Incremental Tests OK=129 ERROR=0
|
||||||
==================================
|
==================================
|
||||||
|
|||||||
Reference in New Issue
Block a user