mirror of
https://github.com/json-c/json-c.git
synced 2026-04-09 23:39:06 +08:00
In the json_tokener_state_number case, explicitly adjust what "number" characters are allowed based on the exact micro-state that we're in, and check for invalid following characters in a different way, to allow a valid json_type_number object to be returned at the top level.
This causes previously failing strings like "123-456" to return a valid json_object with the appropriate value. If you care about the trailing content, call json_tokener_parse_ex() and check the parse end point with json_tokener_get_parse_end().
This commit is contained in:
@@ -30,6 +30,9 @@ Other changes
|
|||||||
* Fix incremental parsing of numbers, especially those with exponents, e.g.
|
* Fix incremental parsing of numbers, especially those with exponents, e.g.
|
||||||
so parsing "[0", "e+", "-]" now properly returns an error.
|
so parsing "[0", "e+", "-]" now properly returns an error.
|
||||||
Strict mode now rejects missing exponents ("0e").
|
Strict mode now rejects missing exponents ("0e").
|
||||||
|
* Successfully return number objects at the top level even when they are
|
||||||
|
followed by a "-", "." or "e". This makes parsing things like "123-45"
|
||||||
|
behave consistently with things like "123xyz".
|
||||||
|
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|||||||
@@ -54,7 +54,6 @@
|
|||||||
// Don't define this. It's not thread-safe.
|
// Don't define this. It's not thread-safe.
|
||||||
/* #define REFCOUNT_DEBUG 1 */
|
/* #define REFCOUNT_DEBUG 1 */
|
||||||
|
|
||||||
const char *json_number_chars = "0123456789.+-eE";
|
|
||||||
const char *json_hex_chars = "0123456789abcdefABCDEF";
|
const char *json_hex_chars = "0123456789abcdefABCDEF";
|
||||||
|
|
||||||
static void json_object_generic_delete(struct json_object *jso);
|
static void json_object_generic_delete(struct json_object *jso);
|
||||||
|
|||||||
@@ -98,7 +98,6 @@ struct json_object_string
|
|||||||
|
|
||||||
void _json_c_set_last_err(const char *err_fmt, ...);
|
void _json_c_set_last_err(const char *err_fmt, ...);
|
||||||
|
|
||||||
extern const char *json_number_chars;
|
|
||||||
extern const char *json_hex_chars;
|
extern const char *json_hex_chars;
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|||||||
@@ -193,7 +193,17 @@ struct json_object *json_tokener_parse_verbose(const char *str, enum json_tokene
|
|||||||
return NULL;
|
return NULL;
|
||||||
obj = json_tokener_parse_ex(tok, str, -1);
|
obj = json_tokener_parse_ex(tok, str, -1);
|
||||||
*error = tok->err;
|
*error = tok->err;
|
||||||
if (tok->err != json_tokener_success)
|
if (tok->err != json_tokener_success
|
||||||
|
#if 0
|
||||||
|
/* This would be a more sensible default, and cause parsing
|
||||||
|
* things like "null123" to fail when the caller can't know
|
||||||
|
* where the parsing left off, but starting to fail would
|
||||||
|
* be a notable behaviour change. Save for a 1.0 release.
|
||||||
|
*/
|
||||||
|
|| json_tokener_get_parse_end(tok) != strlen(str)
|
||||||
|
#endif
|
||||||
|
)
|
||||||
|
|
||||||
{
|
{
|
||||||
if (obj != NULL)
|
if (obj != NULL)
|
||||||
json_object_put(obj);
|
json_object_put(obj);
|
||||||
@@ -838,7 +848,8 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char *
|
|||||||
const char *case_start = str;
|
const char *case_start = str;
|
||||||
int case_len = 0;
|
int case_len = 0;
|
||||||
int is_exponent = 0;
|
int is_exponent = 0;
|
||||||
int negativesign_next_possible_location = 1;
|
int neg_sign_ok = 1;
|
||||||
|
int pos_sign_ok = 0;
|
||||||
if (printbuf_length(tok->pb) > 0)
|
if (printbuf_length(tok->pb) > 0)
|
||||||
{
|
{
|
||||||
/* We don't save all state from the previous incremental parse
|
/* We don't save all state from the previous incremental parse
|
||||||
@@ -852,14 +863,26 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char *
|
|||||||
char *last_saved_char =
|
char *last_saved_char =
|
||||||
&tok->pb->buf[printbuf_length(tok->pb) - 1];
|
&tok->pb->buf[printbuf_length(tok->pb) - 1];
|
||||||
is_exponent = 1;
|
is_exponent = 1;
|
||||||
|
pos_sign_ok = neg_sign_ok = 1;
|
||||||
/* If the "e" isn't at the end, we can't start with a '-' */
|
/* If the "e" isn't at the end, we can't start with a '-' */
|
||||||
if (e_loc != last_saved_char)
|
if (e_loc != last_saved_char)
|
||||||
negativesign_next_possible_location = -1;
|
{
|
||||||
|
neg_sign_ok = 0;
|
||||||
|
pos_sign_ok = 0;
|
||||||
|
}
|
||||||
// else leave it set to 1, i.e. start of the new input
|
// else leave it set to 1, i.e. start of the new input
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
while (c && strchr(json_number_chars, c))
|
|
||||||
|
while (c &&
|
||||||
|
((c >= '0' && c <= '9') ||
|
||||||
|
(!is_exponent && (c=='e' || c=='E')) ||
|
||||||
|
(neg_sign_ok && c=='-') ||
|
||||||
|
(pos_sign_ok && c=='+') ||
|
||||||
|
(!tok->is_double && c=='.')
|
||||||
|
))
|
||||||
{
|
{
|
||||||
|
pos_sign_ok = neg_sign_ok = 0;
|
||||||
++case_len;
|
++case_len;
|
||||||
|
|
||||||
/* non-digit characters checks */
|
/* non-digit characters checks */
|
||||||
@@ -871,40 +894,16 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char *
|
|||||||
switch (c)
|
switch (c)
|
||||||
{
|
{
|
||||||
case '.':
|
case '.':
|
||||||
if (tok->is_double != 0)
|
|
||||||
{
|
|
||||||
/* '.' can only be found once, and out of the exponent part.
|
|
||||||
* Thus, if the input is already flagged as double, it
|
|
||||||
* is invalid.
|
|
||||||
*/
|
|
||||||
tok->err = json_tokener_error_parse_number;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
tok->is_double = 1;
|
tok->is_double = 1;
|
||||||
|
pos_sign_ok = 1;
|
||||||
|
neg_sign_ok = 1;
|
||||||
break;
|
break;
|
||||||
case 'e': /* FALLTHRU */
|
case 'e': /* FALLTHRU */
|
||||||
case 'E':
|
case 'E':
|
||||||
if (is_exponent != 0)
|
|
||||||
{
|
|
||||||
/* only one exponent possible */
|
|
||||||
tok->err = json_tokener_error_parse_number;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
is_exponent = 1;
|
is_exponent = 1;
|
||||||
tok->is_double = 1;
|
tok->is_double = 1;
|
||||||
/* the exponent part can begin with a negative sign */
|
/* the exponent part can begin with a negative sign */
|
||||||
negativesign_next_possible_location = case_len + 1;
|
pos_sign_ok = neg_sign_ok = 1;
|
||||||
break;
|
|
||||||
case '-':
|
|
||||||
if (case_len != negativesign_next_possible_location)
|
|
||||||
{
|
|
||||||
/* If the negative sign is not where expected (ie
|
|
||||||
* start of input or start of exponent part), the
|
|
||||||
* input is invalid.
|
|
||||||
*/
|
|
||||||
tok->err = json_tokener_error_parse_number;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
default: break;
|
default: break;
|
||||||
}
|
}
|
||||||
@@ -915,6 +914,22 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char *
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
|
Now we know c isn't a valid number char, but check whether
|
||||||
|
it might have been intended to be, and return a potentially
|
||||||
|
more understandable error right away.
|
||||||
|
However, if we're at the top-level, use the number as-is
|
||||||
|
because c can be part of a new object to parse on the
|
||||||
|
next call to json_tokener_parse().
|
||||||
|
*/
|
||||||
|
if (tok->depth > 0 &&
|
||||||
|
c != ',' && c != ']' && c != '}' && c != '/' &&
|
||||||
|
c != 'I' && c != 'i' &&
|
||||||
|
!isspace((unsigned char)c))
|
||||||
|
{
|
||||||
|
tok->err = json_tokener_error_parse_number;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
if (case_len > 0)
|
if (case_len > 0)
|
||||||
printbuf_memappend_fast(tok->pb, case_start, case_len);
|
printbuf_memappend_fast(tok->pb, case_start, case_len);
|
||||||
|
|
||||||
|
|||||||
@@ -141,16 +141,18 @@ static void test_basic_parse()
|
|||||||
|
|
||||||
single_basic_parse("12", 0);
|
single_basic_parse("12", 0);
|
||||||
single_basic_parse("12.3", 0);
|
single_basic_parse("12.3", 0);
|
||||||
single_basic_parse("12.3.4", 0); /* non-sensical, returns null */
|
|
||||||
/* was returning (int)2015 before patch, should return null */
|
|
||||||
single_basic_parse("2015-01-15", 0);
|
|
||||||
|
|
||||||
/* ...but this works. It's rather inconsistent, and a future major release
|
/* Even though, when using json_tokener_parse() there's no way to
|
||||||
* should change the behavior so it either always returns null when extra
|
* know when there is more data after the parsed object,
|
||||||
* bytes are present (preferred), or always return object created from as much
|
* an object is successfully returned anyway (in some cases)
|
||||||
* as was able to be parsed.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
single_basic_parse("12.3.4", 0);
|
||||||
|
single_basic_parse("2015-01-15", 0);
|
||||||
single_basic_parse("12.3xxx", 0);
|
single_basic_parse("12.3xxx", 0);
|
||||||
|
single_basic_parse("12.3{\"a\":123}", 0);
|
||||||
|
single_basic_parse("12.3\n", 0);
|
||||||
|
single_basic_parse("12.3 ", 0);
|
||||||
|
|
||||||
single_basic_parse("{\"FoO\" : -12.3E512}", 0);
|
single_basic_parse("{\"FoO\" : -12.3E512}", 0);
|
||||||
single_basic_parse("{\"FoO\" : -12.3e512}", 0);
|
single_basic_parse("{\"FoO\" : -12.3e512}", 0);
|
||||||
@@ -368,7 +370,10 @@ struct incremental_step
|
|||||||
{"[0e-]", -1, -1, json_tokener_success, 1},
|
{"[0e-]", -1, -1, json_tokener_success, 1},
|
||||||
{"[0e-]", -1, 4, json_tokener_error_parse_number, 1, JSON_TOKENER_STRICT},
|
{"[0e-]", -1, 4, json_tokener_error_parse_number, 1, JSON_TOKENER_STRICT},
|
||||||
|
|
||||||
{"0e+-", 5, 3, json_tokener_error_parse_number, 1},
|
/* You might expect this to fail, but it won't because
|
||||||
|
it's a valid partial parse; note the char_offset: */
|
||||||
|
{"0e+-", 5, 3, json_tokener_success, 1},
|
||||||
|
{"0e+-", 5, 3, json_tokener_error_parse_number, 1, JSON_TOKENER_STRICT},
|
||||||
{"[0e+-]", -1, 4, json_tokener_error_parse_number, 1},
|
{"[0e+-]", -1, 4, json_tokener_error_parse_number, 1},
|
||||||
|
|
||||||
/* Similar tests for other kinds of objects: */
|
/* Similar tests for other kinds of objects: */
|
||||||
@@ -447,11 +452,22 @@ struct incremental_step
|
|||||||
{"{\"a\":1}{\"b\":2}", 15, 7, json_tokener_success, 0},
|
{"{\"a\":1}{\"b\":2}", 15, 7, json_tokener_success, 0},
|
||||||
{&"{\"a\":1}{\"b\":2}"[7], 8, 7, json_tokener_success, 1},
|
{&"{\"a\":1}{\"b\":2}"[7], 8, 7, json_tokener_success, 1},
|
||||||
|
|
||||||
/* Some bad formatting. Check we get the correct error status
|
/*
|
||||||
* XXX this means we can't have two numbers in the incremental parse
|
* Though this may seem invalid at first glance, it
|
||||||
* XXX stream with the second one being a negative number!
|
* parses as three separate numbers, 2015, -1 and -15
|
||||||
*/
|
* Of course, simply pasting together a stream of arbitrary
|
||||||
{"2015-01-15", 10, 4, json_tokener_error_parse_number, 1},
|
* positive numbers won't work, since there'll be no way to
|
||||||
|
* tell where in e.g. "2015015" the next number stats, so
|
||||||
|
* a reliably parsable stream must not include json_type_int
|
||||||
|
* or json_type_double objects without some other delimiter.
|
||||||
|
* e.g. whitespace
|
||||||
|
*/
|
||||||
|
{&"2015-01-15"[0], 11, 4, json_tokener_success, 1},
|
||||||
|
{&"2015-01-15"[4], 7, 3, json_tokener_success, 1},
|
||||||
|
{&"2015-01-15"[7], 4, 3, json_tokener_success, 1},
|
||||||
|
{&"2015 01 15"[0], 11, 5, json_tokener_success, 1},
|
||||||
|
{&"2015 01 15"[4], 7, 4, json_tokener_success, 1},
|
||||||
|
{&"2015 01 15"[7], 4, 3, json_tokener_success, 1},
|
||||||
|
|
||||||
/* Strings have a well defined end point, so we can stop at the quote */
|
/* Strings have a well defined end point, so we can stop at the quote */
|
||||||
{"\"blue\"", -1, -1, json_tokener_success, 0},
|
{"\"blue\"", -1, -1, json_tokener_success, 0},
|
||||||
|
|||||||
@@ -40,9 +40,13 @@ new_obj.to_string(nAn)=NaN
|
|||||||
new_obj.to_string(iNfinity)=Infinity
|
new_obj.to_string(iNfinity)=Infinity
|
||||||
new_obj.to_string(12)=12
|
new_obj.to_string(12)=12
|
||||||
new_obj.to_string(12.3)=12.3
|
new_obj.to_string(12.3)=12.3
|
||||||
new_obj.to_string(12.3.4)=null
|
new_obj.to_string(12.3.4)=12.3
|
||||||
new_obj.to_string(2015-01-15)=null
|
new_obj.to_string(2015-01-15)=2015
|
||||||
new_obj.to_string(12.3xxx)=12.3
|
new_obj.to_string(12.3xxx)=12.3
|
||||||
|
new_obj.to_string(12.3{"a":123})=12.3
|
||||||
|
new_obj.to_string(12.3
|
||||||
|
)=12.3
|
||||||
|
new_obj.to_string(12.3 )=12.3
|
||||||
new_obj.to_string({"FoO" : -12.3E512})={ "FoO": -12.3E512 }
|
new_obj.to_string({"FoO" : -12.3E512})={ "FoO": -12.3E512 }
|
||||||
new_obj.to_string({"FoO" : -12.3e512})={ "FoO": -12.3e512 }
|
new_obj.to_string({"FoO" : -12.3e512})={ "FoO": -12.3e512 }
|
||||||
new_obj.to_string({"FoO" : -12.3E51.2})=null
|
new_obj.to_string({"FoO" : -12.3E51.2})=null
|
||||||
@@ -162,6 +166,7 @@ json_tokener_parse_ex(tok, 0e- , 4) ... OK: got object of type [double
|
|||||||
json_tokener_parse_ex(tok, 0e- , 4) ... OK: got correct error: unexpected end of data
|
json_tokener_parse_ex(tok, 0e- , 4) ... OK: got correct error: unexpected end of data
|
||||||
json_tokener_parse_ex(tok, [0e-] , 5) ... OK: got object of type [array]: [ 0 ]
|
json_tokener_parse_ex(tok, [0e-] , 5) ... OK: got object of type [array]: [ 0 ]
|
||||||
json_tokener_parse_ex(tok, [0e-] , 5) ... OK: got correct error: number expected
|
json_tokener_parse_ex(tok, [0e-] , 5) ... OK: got correct error: number expected
|
||||||
|
json_tokener_parse_ex(tok, 0e+- , 5) ... OK: got object of type [double]: 0
|
||||||
json_tokener_parse_ex(tok, 0e+- , 5) ... OK: got correct error: number expected
|
json_tokener_parse_ex(tok, 0e+- , 5) ... OK: got correct error: number expected
|
||||||
json_tokener_parse_ex(tok, [0e+-] , 6) ... OK: got correct error: number expected
|
json_tokener_parse_ex(tok, [0e+-] , 6) ... OK: got correct error: number expected
|
||||||
json_tokener_parse_ex(tok, false , 5) ... OK: got correct error: continue
|
json_tokener_parse_ex(tok, false , 5) ... OK: got correct error: continue
|
||||||
@@ -215,7 +220,12 @@ json_tokener_parse_ex(tok, nullx , 6) ... OK: got object of type [null]:
|
|||||||
json_tokener_parse_ex(tok, x , 2) ... OK: got correct error: unexpected character
|
json_tokener_parse_ex(tok, x , 2) ... OK: got correct error: unexpected character
|
||||||
json_tokener_parse_ex(tok, {"a":1}{"b":2}, 15) ... OK: got object of type [object]: { "a": 1 }
|
json_tokener_parse_ex(tok, {"a":1}{"b":2}, 15) ... OK: got object of type [object]: { "a": 1 }
|
||||||
json_tokener_parse_ex(tok, {"b":2} , 8) ... OK: got object of type [object]: { "b": 2 }
|
json_tokener_parse_ex(tok, {"b":2} , 8) ... OK: got object of type [object]: { "b": 2 }
|
||||||
json_tokener_parse_ex(tok, 2015-01-15 , 10) ... OK: got correct error: number expected
|
json_tokener_parse_ex(tok, 2015-01-15 , 11) ... OK: got object of type [int]: 2015
|
||||||
|
json_tokener_parse_ex(tok, -01-15 , 7) ... OK: got object of type [int]: -1
|
||||||
|
json_tokener_parse_ex(tok, -15 , 4) ... OK: got object of type [int]: -15
|
||||||
|
json_tokener_parse_ex(tok, 2015 01 15 , 11) ... OK: got object of type [int]: 2015
|
||||||
|
json_tokener_parse_ex(tok, 01 15 , 7) ... OK: got object of type [int]: 1
|
||||||
|
json_tokener_parse_ex(tok, 15 , 4) ... OK: got object of type [int]: 15
|
||||||
json_tokener_parse_ex(tok, "blue" , 6) ... OK: got object of type [string]: "blue"
|
json_tokener_parse_ex(tok, "blue" , 6) ... OK: got object of type [string]: "blue"
|
||||||
json_tokener_parse_ex(tok, "\"" , 4) ... OK: got object of type [string]: "\""
|
json_tokener_parse_ex(tok, "\"" , 4) ... OK: got object of type [string]: "\""
|
||||||
json_tokener_parse_ex(tok, "\\" , 4) ... OK: got object of type [string]: "\\"
|
json_tokener_parse_ex(tok, "\\" , 4) ... OK: got object of type [string]: "\\"
|
||||||
@@ -265,5 +275,5 @@ json_tokener_parse_ex(tok, "\ud855
|
|||||||
json_tokener_parse_ex(tok, "\ud0031<33>" , 10) ... OK: got correct error: invalid utf-8 string
|
json_tokener_parse_ex(tok, "\ud0031<33>" , 10) ... OK: got correct error: invalid utf-8 string
|
||||||
json_tokener_parse_ex(tok, 11<31>11 , 5) ... OK: got correct error: invalid utf-8 string
|
json_tokener_parse_ex(tok, 11<31>11 , 5) ... OK: got correct error: invalid utf-8 string
|
||||||
json_tokener_parse_ex(tok, {"1<>":1} , 8) ... OK: got correct error: invalid utf-8 string
|
json_tokener_parse_ex(tok, {"1<>":1} , 8) ... OK: got correct error: invalid utf-8 string
|
||||||
End Incremental Tests OK=179 ERROR=0
|
End Incremental Tests OK=185 ERROR=0
|
||||||
==================================
|
==================================
|
||||||
|
|||||||
Reference in New Issue
Block a user