[ruby/json] Limit the size of ParserError exception messages

Fix: https://github.com/ruby/json/issues/534

Only include up to 32 bytes of unparseable the source.

https://github.com/ruby/json/commit/f44995cfb6
This commit is contained in:
Jean Boussier 2024-10-18 14:04:52 +02:00 committed by Hiroshi SHIBATA
parent 7dfc1f3d66
commit 9045258c88
3 changed files with 105 additions and 75 deletions

View File

@ -69,12 +69,22 @@ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
return len; return len;
} }
#define PARSE_ERROR_FRAGMENT_LEN 32
#ifdef RBIMPL_ATTR_NORETURN #ifdef RBIMPL_ATTR_NORETURN
RBIMPL_ATTR_NORETURN() RBIMPL_ATTR_NORETURN()
#endif #endif
static void raise_parse_error(const char *format, const char *start) static void raise_parse_error(const char *format, const char *start)
{ {
rb_enc_raise(rb_utf8_encoding(), rb_path2class("JSON::ParserError"), format, start); size_t len = strnlen(start, PARSE_ERROR_FRAGMENT_LEN);
const char *ptr = start;
if (len == PARSE_ERROR_FRAGMENT_LEN) {
char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN);
buffer[PARSE_ERROR_FRAGMENT_LEN] = '\0';
ptr = buffer;
}
rb_enc_raise(rb_utf8_encoding(), rb_path2class("JSON::ParserError"), format, ptr);
} }
static VALUE mJSON, mExt, cParser, eNestingError; static VALUE mJSON, mExt, cParser, eNestingError;
@ -91,11 +101,11 @@ static int utf8_encindex;
#line 117 "parser.rl" #line 127 "parser.rl"
#line 99 "parser.c" #line 109 "parser.c"
enum {JSON_object_start = 1}; enum {JSON_object_start = 1};
enum {JSON_object_first_final = 27}; enum {JSON_object_first_final = 27};
enum {JSON_object_error = 0}; enum {JSON_object_error = 0};
@ -103,7 +113,7 @@ enum {JSON_object_error = 0};
enum {JSON_object_en_main = 1}; enum {JSON_object_en_main = 1};
#line 159 "parser.rl" #line 169 "parser.rl"
static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
@ -119,14 +129,14 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu
*result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class); *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class);
#line 123 "parser.c" #line 133 "parser.c"
{ {
cs = JSON_object_start; cs = JSON_object_start;
} }
#line 174 "parser.rl" #line 184 "parser.rl"
#line 130 "parser.c" #line 140 "parser.c"
{ {
if ( p == pe ) if ( p == pe )
goto _test_eof; goto _test_eof;
@ -154,7 +164,7 @@ case 2:
goto st2; goto st2;
goto st0; goto st0;
tr2: tr2:
#line 141 "parser.rl" #line 151 "parser.rl"
{ {
char *np; char *np;
json->parsing_name = 1; json->parsing_name = 1;
@ -167,7 +177,7 @@ st3:
if ( ++p == pe ) if ( ++p == pe )
goto _test_eof3; goto _test_eof3;
case 3: case 3:
#line 171 "parser.c" #line 181 "parser.c"
switch( (*p) ) { switch( (*p) ) {
case 13: goto st3; case 13: goto st3;
case 32: goto st3; case 32: goto st3;
@ -234,7 +244,7 @@ case 8:
goto st8; goto st8;
goto st0; goto st0;
tr11: tr11:
#line 125 "parser.rl" #line 135 "parser.rl"
{ {
VALUE v = Qnil; VALUE v = Qnil;
char *np = JSON_parse_value(json, p, pe, &v, current_nesting); char *np = JSON_parse_value(json, p, pe, &v, current_nesting);
@ -255,7 +265,7 @@ st9:
if ( ++p == pe ) if ( ++p == pe )
goto _test_eof9; goto _test_eof9;
case 9: case 9:
#line 259 "parser.c" #line 269 "parser.c"
switch( (*p) ) { switch( (*p) ) {
case 13: goto st9; case 13: goto st9;
case 32: goto st9; case 32: goto st9;
@ -344,14 +354,14 @@ case 18:
goto st9; goto st9;
goto st18; goto st18;
tr4: tr4:
#line 149 "parser.rl" #line 159 "parser.rl"
{ p--; {p++; cs = 27; goto _out;} } { p--; {p++; cs = 27; goto _out;} }
goto st27; goto st27;
st27: st27:
if ( ++p == pe ) if ( ++p == pe )
goto _test_eof27; goto _test_eof27;
case 27: case 27:
#line 355 "parser.c" #line 365 "parser.c"
goto st0; goto st0;
st19: st19:
if ( ++p == pe ) if ( ++p == pe )
@ -449,7 +459,7 @@ case 26:
_out: {} _out: {}
} }
#line 175 "parser.rl" #line 185 "parser.rl"
if (cs >= JSON_object_first_final) { if (cs >= JSON_object_first_final) {
if (json->create_additions) { if (json->create_additions) {
@ -474,7 +484,7 @@ case 26:
#line 478 "parser.c" #line 488 "parser.c"
enum {JSON_value_start = 1}; enum {JSON_value_start = 1};
enum {JSON_value_first_final = 29}; enum {JSON_value_first_final = 29};
enum {JSON_value_error = 0}; enum {JSON_value_error = 0};
@ -482,7 +492,7 @@ enum {JSON_value_error = 0};
enum {JSON_value_en_main = 1}; enum {JSON_value_en_main = 1};
#line 275 "parser.rl" #line 285 "parser.rl"
static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
@ -490,14 +500,14 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul
int cs = EVIL; int cs = EVIL;
#line 494 "parser.c" #line 504 "parser.c"
{ {
cs = JSON_value_start; cs = JSON_value_start;
} }
#line 282 "parser.rl" #line 292 "parser.rl"
#line 501 "parser.c" #line 511 "parser.c"
{ {
if ( p == pe ) if ( p == pe )
goto _test_eof; goto _test_eof;
@ -531,14 +541,14 @@ st0:
cs = 0; cs = 0;
goto _out; goto _out;
tr2: tr2:
#line 227 "parser.rl" #line 237 "parser.rl"
{ {
char *np = JSON_parse_string(json, p, pe, result); char *np = JSON_parse_string(json, p, pe, result);
if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;} if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;}
} }
goto st29; goto st29;
tr3: tr3:
#line 232 "parser.rl" #line 242 "parser.rl"
{ {
char *np; char *np;
if(pe > p + 8 && !strncmp(MinusInfinity, p, 9)) { if(pe > p + 8 && !strncmp(MinusInfinity, p, 9)) {
@ -558,7 +568,7 @@ tr3:
} }
goto st29; goto st29;
tr7: tr7:
#line 250 "parser.rl" #line 260 "parser.rl"
{ {
char *np; char *np;
np = JSON_parse_array(json, p, pe, result, current_nesting + 1); np = JSON_parse_array(json, p, pe, result, current_nesting + 1);
@ -566,7 +576,7 @@ tr7:
} }
goto st29; goto st29;
tr11: tr11:
#line 256 "parser.rl" #line 266 "parser.rl"
{ {
char *np; char *np;
np = JSON_parse_object(json, p, pe, result, current_nesting + 1); np = JSON_parse_object(json, p, pe, result, current_nesting + 1);
@ -574,7 +584,7 @@ tr11:
} }
goto st29; goto st29;
tr25: tr25:
#line 220 "parser.rl" #line 230 "parser.rl"
{ {
if (json->allow_nan) { if (json->allow_nan) {
*result = CInfinity; *result = CInfinity;
@ -584,7 +594,7 @@ tr25:
} }
goto st29; goto st29;
tr27: tr27:
#line 213 "parser.rl" #line 223 "parser.rl"
{ {
if (json->allow_nan) { if (json->allow_nan) {
*result = CNaN; *result = CNaN;
@ -594,19 +604,19 @@ tr27:
} }
goto st29; goto st29;
tr31: tr31:
#line 207 "parser.rl" #line 217 "parser.rl"
{ {
*result = Qfalse; *result = Qfalse;
} }
goto st29; goto st29;
tr34: tr34:
#line 204 "parser.rl" #line 214 "parser.rl"
{ {
*result = Qnil; *result = Qnil;
} }
goto st29; goto st29;
tr37: tr37:
#line 210 "parser.rl" #line 220 "parser.rl"
{ {
*result = Qtrue; *result = Qtrue;
} }
@ -615,9 +625,9 @@ st29:
if ( ++p == pe ) if ( ++p == pe )
goto _test_eof29; goto _test_eof29;
case 29: case 29:
#line 262 "parser.rl" #line 272 "parser.rl"
{ p--; {p++; cs = 29; goto _out;} } { p--; {p++; cs = 29; goto _out;} }
#line 621 "parser.c" #line 631 "parser.c"
switch( (*p) ) { switch( (*p) ) {
case 13: goto st29; case 13: goto st29;
case 32: goto st29; case 32: goto st29;
@ -858,7 +868,7 @@ case 28:
_out: {} _out: {}
} }
#line 283 "parser.rl" #line 293 "parser.rl"
if (json->freeze) { if (json->freeze) {
OBJ_FREEZE(*result); OBJ_FREEZE(*result);
@ -872,7 +882,7 @@ case 28:
} }
#line 876 "parser.c" #line 886 "parser.c"
enum {JSON_integer_start = 1}; enum {JSON_integer_start = 1};
enum {JSON_integer_first_final = 3}; enum {JSON_integer_first_final = 3};
enum {JSON_integer_error = 0}; enum {JSON_integer_error = 0};
@ -880,7 +890,7 @@ enum {JSON_integer_error = 0};
enum {JSON_integer_en_main = 1}; enum {JSON_integer_en_main = 1};
#line 303 "parser.rl" #line 313 "parser.rl"
static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result)
@ -888,15 +898,15 @@ static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *res
int cs = EVIL; int cs = EVIL;
#line 892 "parser.c" #line 902 "parser.c"
{ {
cs = JSON_integer_start; cs = JSON_integer_start;
} }
#line 310 "parser.rl" #line 320 "parser.rl"
json->memo = p; json->memo = p;
#line 900 "parser.c" #line 910 "parser.c"
{ {
if ( p == pe ) if ( p == pe )
goto _test_eof; goto _test_eof;
@ -930,14 +940,14 @@ case 3:
goto st0; goto st0;
goto tr4; goto tr4;
tr4: tr4:
#line 300 "parser.rl" #line 310 "parser.rl"
{ p--; {p++; cs = 4; goto _out;} } { p--; {p++; cs = 4; goto _out;} }
goto st4; goto st4;
st4: st4:
if ( ++p == pe ) if ( ++p == pe )
goto _test_eof4; goto _test_eof4;
case 4: case 4:
#line 941 "parser.c" #line 951 "parser.c"
goto st0; goto st0;
st5: st5:
if ( ++p == pe ) if ( ++p == pe )
@ -956,7 +966,7 @@ case 5:
_out: {} _out: {}
} }
#line 312 "parser.rl" #line 322 "parser.rl"
if (cs >= JSON_integer_first_final) { if (cs >= JSON_integer_first_final) {
long len = p - json->memo; long len = p - json->memo;
@ -971,7 +981,7 @@ case 5:
} }
#line 975 "parser.c" #line 985 "parser.c"
enum {JSON_float_start = 1}; enum {JSON_float_start = 1};
enum {JSON_float_first_final = 8}; enum {JSON_float_first_final = 8};
enum {JSON_float_error = 0}; enum {JSON_float_error = 0};
@ -979,7 +989,7 @@ enum {JSON_float_error = 0};
enum {JSON_float_en_main = 1}; enum {JSON_float_en_main = 1};
#line 337 "parser.rl" #line 347 "parser.rl"
static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result)
@ -987,15 +997,15 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul
int cs = EVIL; int cs = EVIL;
#line 991 "parser.c" #line 1001 "parser.c"
{ {
cs = JSON_float_start; cs = JSON_float_start;
} }
#line 344 "parser.rl" #line 354 "parser.rl"
json->memo = p; json->memo = p;
#line 999 "parser.c" #line 1009 "parser.c"
{ {
if ( p == pe ) if ( p == pe )
goto _test_eof; goto _test_eof;
@ -1053,14 +1063,14 @@ case 8:
goto st0; goto st0;
goto tr9; goto tr9;
tr9: tr9:
#line 331 "parser.rl" #line 341 "parser.rl"
{ p--; {p++; cs = 9; goto _out;} } { p--; {p++; cs = 9; goto _out;} }
goto st9; goto st9;
st9: st9:
if ( ++p == pe ) if ( ++p == pe )
goto _test_eof9; goto _test_eof9;
case 9: case 9:
#line 1064 "parser.c" #line 1074 "parser.c"
goto st0; goto st0;
st5: st5:
if ( ++p == pe ) if ( ++p == pe )
@ -1121,7 +1131,7 @@ case 7:
_out: {} _out: {}
} }
#line 346 "parser.rl" #line 356 "parser.rl"
if (cs >= JSON_float_first_final) { if (cs >= JSON_float_first_final) {
VALUE mod = Qnil; VALUE mod = Qnil;
@ -1172,7 +1182,7 @@ case 7:
#line 1176 "parser.c" #line 1186 "parser.c"
enum {JSON_array_start = 1}; enum {JSON_array_start = 1};
enum {JSON_array_first_final = 17}; enum {JSON_array_first_final = 17};
enum {JSON_array_error = 0}; enum {JSON_array_error = 0};
@ -1180,7 +1190,7 @@ enum {JSON_array_error = 0};
enum {JSON_array_en_main = 1}; enum {JSON_array_en_main = 1};
#line 424 "parser.rl" #line 434 "parser.rl"
static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
@ -1194,14 +1204,14 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul
*result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class); *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class);
#line 1198 "parser.c" #line 1208 "parser.c"
{ {
cs = JSON_array_start; cs = JSON_array_start;
} }
#line 437 "parser.rl" #line 447 "parser.rl"
#line 1205 "parser.c" #line 1215 "parser.c"
{ {
if ( p == pe ) if ( p == pe )
goto _test_eof; goto _test_eof;
@ -1240,7 +1250,7 @@ case 2:
goto st2; goto st2;
goto st0; goto st0;
tr2: tr2:
#line 401 "parser.rl" #line 411 "parser.rl"
{ {
VALUE v = Qnil; VALUE v = Qnil;
char *np = JSON_parse_value(json, p, pe, &v, current_nesting); char *np = JSON_parse_value(json, p, pe, &v, current_nesting);
@ -1260,7 +1270,7 @@ st3:
if ( ++p == pe ) if ( ++p == pe )
goto _test_eof3; goto _test_eof3;
case 3: case 3:
#line 1264 "parser.c" #line 1274 "parser.c"
switch( (*p) ) { switch( (*p) ) {
case 13: goto st3; case 13: goto st3;
case 32: goto st3; case 32: goto st3;
@ -1360,14 +1370,14 @@ case 12:
goto st3; goto st3;
goto st12; goto st12;
tr4: tr4:
#line 416 "parser.rl" #line 426 "parser.rl"
{ p--; {p++; cs = 17; goto _out;} } { p--; {p++; cs = 17; goto _out;} }
goto st17; goto st17;
st17: st17:
if ( ++p == pe ) if ( ++p == pe )
goto _test_eof17; goto _test_eof17;
case 17: case 17:
#line 1371 "parser.c" #line 1381 "parser.c"
goto st0; goto st0;
st13: st13:
if ( ++p == pe ) if ( ++p == pe )
@ -1423,7 +1433,7 @@ case 16:
_out: {} _out: {}
} }
#line 438 "parser.rl" #line 448 "parser.rl"
if(cs >= JSON_array_first_final) { if(cs >= JSON_array_first_final) {
return p + 1; return p + 1;
@ -1584,7 +1594,7 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int
} }
#line 1588 "parser.c" #line 1598 "parser.c"
enum {JSON_string_start = 1}; enum {JSON_string_start = 1};
enum {JSON_string_first_final = 8}; enum {JSON_string_first_final = 8};
enum {JSON_string_error = 0}; enum {JSON_string_error = 0};
@ -1592,7 +1602,7 @@ enum {JSON_string_error = 0};
enum {JSON_string_en_main = 1}; enum {JSON_string_en_main = 1};
#line 616 "parser.rl" #line 626 "parser.rl"
static int static int
@ -1613,15 +1623,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu
VALUE match_string; VALUE match_string;
#line 1617 "parser.c" #line 1627 "parser.c"
{ {
cs = JSON_string_start; cs = JSON_string_start;
} }
#line 636 "parser.rl" #line 646 "parser.rl"
json->memo = p; json->memo = p;
#line 1625 "parser.c" #line 1635 "parser.c"
{ {
if ( p == pe ) if ( p == pe )
goto _test_eof; goto _test_eof;
@ -1646,7 +1656,7 @@ case 2:
goto st0; goto st0;
goto st2; goto st2;
tr2: tr2:
#line 603 "parser.rl" #line 613 "parser.rl"
{ {
*result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); *result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
if (NIL_P(*result)) { if (NIL_P(*result)) {
@ -1656,14 +1666,14 @@ tr2:
{p = (( p + 1))-1;} {p = (( p + 1))-1;}
} }
} }
#line 613 "parser.rl" #line 623 "parser.rl"
{ p--; {p++; cs = 8; goto _out;} } { p--; {p++; cs = 8; goto _out;} }
goto st8; goto st8;
st8: st8:
if ( ++p == pe ) if ( ++p == pe )
goto _test_eof8; goto _test_eof8;
case 8: case 8:
#line 1667 "parser.c" #line 1677 "parser.c"
goto st0; goto st0;
st3: st3:
if ( ++p == pe ) if ( ++p == pe )
@ -1739,7 +1749,7 @@ case 7:
_out: {} _out: {}
} }
#line 638 "parser.rl" #line 648 "parser.rl"
if (json->create_additions && RTEST(match_string = json->match_string)) { if (json->create_additions && RTEST(match_string = json->match_string)) {
VALUE klass; VALUE klass;
@ -1926,7 +1936,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
} }
#line 1930 "parser.c" #line 1940 "parser.c"
enum {JSON_start = 1}; enum {JSON_start = 1};
enum {JSON_first_final = 10}; enum {JSON_first_final = 10};
enum {JSON_error = 0}; enum {JSON_error = 0};
@ -1934,7 +1944,7 @@ enum {JSON_error = 0};
enum {JSON_en_main = 1}; enum {JSON_en_main = 1};
#line 838 "parser.rl" #line 848 "parser.rl"
/* /*
@ -1952,16 +1962,16 @@ static VALUE cParser_parse(VALUE self)
GET_PARSER; GET_PARSER;
#line 1956 "parser.c" #line 1966 "parser.c"
{ {
cs = JSON_start; cs = JSON_start;
} }
#line 855 "parser.rl" #line 865 "parser.rl"
p = json->source; p = json->source;
pe = p + json->len; pe = p + json->len;
#line 1965 "parser.c" #line 1975 "parser.c"
{ {
if ( p == pe ) if ( p == pe )
goto _test_eof; goto _test_eof;
@ -1995,7 +2005,7 @@ st0:
cs = 0; cs = 0;
goto _out; goto _out;
tr2: tr2:
#line 830 "parser.rl" #line 840 "parser.rl"
{ {
char *np = JSON_parse_value(json, p, pe, &result, 0); char *np = JSON_parse_value(json, p, pe, &result, 0);
if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;}
@ -2005,7 +2015,7 @@ st10:
if ( ++p == pe ) if ( ++p == pe )
goto _test_eof10; goto _test_eof10;
case 10: case 10:
#line 2009 "parser.c" #line 2019 "parser.c"
switch( (*p) ) { switch( (*p) ) {
case 13: goto st10; case 13: goto st10;
case 32: goto st10; case 32: goto st10;
@ -2094,7 +2104,7 @@ case 9:
_out: {} _out: {}
} }
#line 858 "parser.rl" #line 868 "parser.rl"
if (cs >= JSON_first_final && p == pe) { if (cs >= JSON_first_final && p == pe) {
return result; return result;

View File

@ -67,12 +67,22 @@ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
return len; return len;
} }
#define PARSE_ERROR_FRAGMENT_LEN 32
#ifdef RBIMPL_ATTR_NORETURN #ifdef RBIMPL_ATTR_NORETURN
RBIMPL_ATTR_NORETURN() RBIMPL_ATTR_NORETURN()
#endif #endif
static void raise_parse_error(const char *format, const char *start) static void raise_parse_error(const char *format, const char *start)
{ {
rb_enc_raise(rb_utf8_encoding(), rb_path2class("JSON::ParserError"), format, start); size_t len = strnlen(start, PARSE_ERROR_FRAGMENT_LEN);
const char *ptr = start;
if (len == PARSE_ERROR_FRAGMENT_LEN) {
char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN);
buffer[PARSE_ERROR_FRAGMENT_LEN] = '\0';
ptr = buffer;
}
rb_enc_raise(rb_utf8_encoding(), rb_path2class("JSON::ParserError"), format, ptr);
} }
static VALUE mJSON, mExt, cParser, eNestingError; static VALUE mJSON, mExt, cParser, eNestingError;

View File

@ -506,6 +506,16 @@ EOT
) )
end end
def test_parse_error_message_length
# Error messages aren't consistent across backends, but we can at least
# enforce that if they include fragments of the source it should be of
# reasonable size.
error = assert_raise(JSON::ParserError) do
JSON.parse('{"foo": ' + ('A' * 500) + '}')
end
assert_operator 60, :>, error.message.bytesize
end
private private
def string_deduplication_available? def string_deduplication_available?