Merge pull request #893 from sffc/supplemental-code-point-bug

Fix bug involving supplemental code points that look like high surrogates
This commit is contained in:
Eric Hawicz
2025-08-07 09:46:47 -04:00
committed by GitHub
3 changed files with 16 additions and 3 deletions

View File

@@ -145,8 +145,8 @@ enum json_tokener_error json_tokener_get_error(struct json_tokener *tok)
}
/* Stuff for decoding unicode sequences */
#define IS_HIGH_SURROGATE(uc) (((uc)&0xFC00) == 0xD800)
#define IS_LOW_SURROGATE(uc) (((uc)&0xFC00) == 0xDC00)
#define IS_HIGH_SURROGATE(uc) (((uc)&0xFFFFFC00) == 0xD800)
#define IS_LOW_SURROGATE(uc) (((uc)&0xFFFFFC00) == 0xDC00)
#define DECODE_SURROGATE_PAIR(hi, lo) ((((hi)&0x3FF) << 10) + ((lo)&0x3FF) + 0x10000)
static unsigned char utf8_replacement_char[3] = {0xEF, 0xBF, 0xBD};

View File

@@ -113,6 +113,9 @@ static void test_basic_parse(void)
single_basic_parse("\"\\udd27\"", 0);
// Test with a "short" high surrogate
single_basic_parse("[9,'\\uDAD", 0);
single_basic_parse("\"[9,'\\uDAD\"", 0);
// Test with a supplemental character that looks like a high surrogate
single_basic_parse("\"\\uD836\\uDE87\"", 0);
single_basic_parse("null", 0);
single_basic_parse("NaN", 0);
single_basic_parse("-NaN", 0); /* non-sensical, returns null */
@@ -332,6 +335,11 @@ struct incremental_step
{"{ \"foo", -1, -1, json_tokener_continue, 1, 0},
{": \"bar\"}", -1, 0, json_tokener_error_parse_unexpected, 1, 0},
/* Check a supplemental code point that looks like a high surrogate */
{"\"\\uD836", -1, -1, json_tokener_continue, 0, 0},
{"\\uDE87", -1, -1, json_tokener_continue, 0, 0},
{"\"", -1, -1, json_tokener_success, 1, 0},
/* Check incremental parsing with trailing characters */
{"{ \"foo", -1, -1, json_tokener_continue, 0, 0},
{"\": {\"bar", -1, -1, json_tokener_continue, 0, 0},

View File

@@ -13,6 +13,8 @@ new_obj.to_string("\ud840\u4e16")="<22>世"
new_obj.to_string("\ud840")="<22>"
new_obj.to_string("\udd27")="<22>"
new_obj.to_string([9,'\uDAD)=null
new_obj.to_string("[9,'\uDAD")=null
new_obj.to_string("\uD836\uDE87")="𝪇"
new_obj.to_string(null)=null
new_obj.to_string(NaN)=NaN
new_obj.to_string(-NaN)=null
@@ -138,6 +140,9 @@ json_tokener_parse_ex(tok, "ä" , 4) ... OK: got object of type [string
json_tokener_parse_ex(tok, "ä" , 4) ... OK: got object of type [string]: "ä"
json_tokener_parse_ex(tok, { "foo , 6) ... OK: got correct error: continue
json_tokener_parse_ex(tok, : "bar"} , 8) ... OK: got correct error: unexpected character
json_tokener_parse_ex(tok, "\uD836 , 7) ... OK: got correct error: continue
json_tokener_parse_ex(tok, \uDE87 , 6) ... OK: got correct error: continue
json_tokener_parse_ex(tok, " , 1) ... OK: got object of type [string]: "𝪇"
json_tokener_parse_ex(tok, { "foo , 6) ... OK: got correct error: continue
json_tokener_parse_ex(tok, ": {"bar , 8) ... OK: got correct error: continue
json_tokener_parse_ex(tok, ":13}}XXXX , 10) ... OK: got object of type [object]: { "foo": { "bar": 13 } }
@@ -363,5 +368,5 @@ json_tokener_parse_ex(tok, {"":1} , 7) ... OK: got correct error: invalid
json_tokener_parse_ex(tok, {"":1} , 7) ... OK: got correct error: invalid string sequence
json_tokener_parse_ex(tok, {"":1} , 7) ... OK: got correct error: invalid string sequence
json_tokener_parse_ex(tok, {"":1} , 7) ... OK: got correct error: invalid string sequence
json_tokener_parse_ex(tok, {"":1} , 7) ... OK: got correct error: invalid string sequence
json_tokener_parse_ex(tok, {"":1} , 7) ... OK: got correct error: invalid string sequence
json_tokener_parse_ex(tok, {"":1} , 7) ... OK: got correct error: invalid string sequence