In the json_tokener_state_number case, explicitly adjust what "number" characters are allowed based on the exact micro-state that we're in, and check for invalid following characters in a different way, to allow a valid json_type_number object to be returned at the top level.

This causes previously failing strings like "123-456" to return a valid json_object with the appropriate value. If you care about the trailing content, call json_tokener_parse_ex() and check the parse end point with json_tokener_get_parse_end().
2026-06-10 13:09:07 +08:00 · 2020-06-29 02:14:26 +00:00
parent 6eac6986c9
commit f23486a321
6 changed files with 92 additions and 50 deletions
--- a/tests/test_parse.c
+++ b/tests/test_parse.c
@@ -141,16 +141,18 @@ static void test_basic_parse()

 	single_basic_parse("12", 0);
 	single_basic_parse("12.3", 0);
-	single_basic_parse("12.3.4", 0); /* non-sensical, returns null */
-	/* was returning (int)2015 before patch, should return null */
-	single_basic_parse("2015-01-15", 0);

-	/* ...but this works.  It's rather inconsistent, and a future major release
-	 * should change the behavior so it either always returns null when extra
-	 * bytes are present (preferred), or always return object created from as much
-	 * as was able to be parsed.
+	/* Even though, when using json_tokener_parse() there's no way to
+	 *  know when there is more data after the parsed object,
+	 *  an object is successfully returned anyway (in some cases)
 	 */
+
+	single_basic_parse("12.3.4", 0);
+	single_basic_parse("2015-01-15", 0);
 	single_basic_parse("12.3xxx", 0);
+	single_basic_parse("12.3{\"a\":123}", 0);
+	single_basic_parse("12.3\n", 0);
+	single_basic_parse("12.3 ", 0);

 	single_basic_parse("{\"FoO\"  :   -12.3E512}", 0);
 	single_basic_parse("{\"FoO\"  :   -12.3e512}", 0);
@@ -368,7 +370,10 @@ struct incremental_step
    {"[0e-]", -1, -1, json_tokener_success, 1},
    {"[0e-]", -1, 4, json_tokener_error_parse_number, 1, JSON_TOKENER_STRICT},

-    {"0e+-", 5, 3, json_tokener_error_parse_number, 1},
+	/* You might expect this to fail, but it won't because
+	   it's a valid partial parse; note the char_offset: */
+    {"0e+-", 5, 3, json_tokener_success, 1},
+    {"0e+-", 5, 3, json_tokener_error_parse_number, 1, JSON_TOKENER_STRICT},
    {"[0e+-]", -1, 4, json_tokener_error_parse_number, 1},

    /* Similar tests for other kinds of objects: */
@@ -447,11 +452,22 @@ struct incremental_step
    {"{\"a\":1}{\"b\":2}", 15, 7, json_tokener_success, 0},
    {&"{\"a\":1}{\"b\":2}"[7], 8, 7, json_tokener_success, 1},

-    /* Some bad formatting. Check we get the correct error status
-     * XXX this means we can't have two numbers in the incremental parse
-     * XXX stream with the second one being a negative number!
-     */
-    {"2015-01-15", 10, 4, json_tokener_error_parse_number, 1},
+	/*
+	 * Though this may seem invalid at first glance, it
+	 * parses as three separate numbers, 2015, -1 and -15
+	 * Of course, simply pasting together a stream of arbitrary
+	 * positive numbers won't work, since there'll be no way to
+     * tell where in e.g. "2015015" the next number stats, so
+	 * a reliably parsable stream must not include json_type_int
+	 * or json_type_double objects without some other delimiter.
+	 * e.g. whitespace
+	 */
+    {&"2015-01-15"[0], 11, 4, json_tokener_success, 1},
+    {&"2015-01-15"[4], 7, 3, json_tokener_success, 1},
+    {&"2015-01-15"[7], 4, 3, json_tokener_success, 1},
+    {&"2015 01 15"[0], 11, 5, json_tokener_success, 1},
+    {&"2015 01 15"[4], 7, 4, json_tokener_success, 1},
+    {&"2015 01 15"[7], 4, 3, json_tokener_success, 1},

    /* Strings have a well defined end point, so we can stop at the quote */
    {"\"blue\"", -1, -1, json_tokener_success, 0},
--- a/tests/test_parse.expected
+++ b/tests/test_parse.expected
@@ -40,9 +40,13 @@ new_obj.to_string(nAn)=NaN
 new_obj.to_string(iNfinity)=Infinity
 new_obj.to_string(12)=12
 new_obj.to_string(12.3)=12.3
-new_obj.to_string(12.3.4)=null
-new_obj.to_string(2015-01-15)=null
+new_obj.to_string(12.3.4)=12.3
+new_obj.to_string(2015-01-15)=2015
 new_obj.to_string(12.3xxx)=12.3
+new_obj.to_string(12.3{"a":123})=12.3
+new_obj.to_string(12.3
+)=12.3
+new_obj.to_string(12.3 )=12.3
 new_obj.to_string({"FoO"  :   -12.3E512})={ "FoO": -12.3E512 }
 new_obj.to_string({"FoO"  :   -12.3e512})={ "FoO": -12.3e512 }
 new_obj.to_string({"FoO"  :   -12.3E51.2})=null
@@ -162,6 +166,7 @@ json_tokener_parse_ex(tok, 0e-         ,   4) ... OK: got object of type [double
 json_tokener_parse_ex(tok, 0e-         ,   4) ... OK: got correct error: unexpected end of data
 json_tokener_parse_ex(tok, [0e-]       ,   5) ... OK: got object of type [array]: [ 0 ]
 json_tokener_parse_ex(tok, [0e-]       ,   5) ... OK: got correct error: number expected
+json_tokener_parse_ex(tok, 0e+-        ,   5) ... OK: got object of type [double]: 0
 json_tokener_parse_ex(tok, 0e+-        ,   5) ... OK: got correct error: number expected
 json_tokener_parse_ex(tok, [0e+-]      ,   6) ... OK: got correct error: number expected
 json_tokener_parse_ex(tok, false       ,   5) ... OK: got correct error: continue
@@ -215,7 +220,12 @@ json_tokener_parse_ex(tok, nullx       ,   6) ... OK: got object of type [null]:
 json_tokener_parse_ex(tok, x           ,   2) ... OK: got correct error: unexpected character
 json_tokener_parse_ex(tok, {"a":1}{"b":2},  15) ... OK: got object of type [object]: { "a": 1 }
 json_tokener_parse_ex(tok, {"b":2}     ,   8) ... OK: got object of type [object]: { "b": 2 }
-json_tokener_parse_ex(tok, 2015-01-15  ,  10) ... OK: got correct error: number expected
+json_tokener_parse_ex(tok, 2015-01-15  ,  11) ... OK: got object of type [int]: 2015
+json_tokener_parse_ex(tok, -01-15      ,   7) ... OK: got object of type [int]: -1
+json_tokener_parse_ex(tok, -15         ,   4) ... OK: got object of type [int]: -15
+json_tokener_parse_ex(tok, 2015 01 15  ,  11) ... OK: got object of type [int]: 2015
+json_tokener_parse_ex(tok,  01 15      ,   7) ... OK: got object of type [int]: 1
+json_tokener_parse_ex(tok,  15         ,   4) ... OK: got object of type [int]: 15
 json_tokener_parse_ex(tok, "blue"      ,   6) ... OK: got object of type [string]: "blue"
 json_tokener_parse_ex(tok, "\""        ,   4) ... OK: got object of type [string]: "\""
 json_tokener_parse_ex(tok, "\\"        ,   4) ... OK: got object of type [string]: "\\"
@@ -265,5 +275,5 @@ json_tokener_parse_ex(tok, "\ud855
 json_tokener_parse_ex(tok, "\ud0031<33>"  ,  10) ... OK: got correct error: invalid utf-8 string
 json_tokener_parse_ex(tok, 11<31>11       ,   5) ... OK: got correct error: invalid utf-8 string
 json_tokener_parse_ex(tok, {"1<>":1}    ,   8) ... OK: got correct error: invalid utf-8 string
-End Incremental Tests OK=179 ERROR=0
+End Incremental Tests OK=185 ERROR=0
 ==================================