[ruby/json] Further improve parsing errors

Report EOF when applicable instead of an empty fragment.

Also stop fragment extraction on first whitespace.

https://github.com/ruby/json/commit/cc1daba860
This commit is contained in:
Jean Boussier 2025-05-12 15:41:12 +02:00 committed by Hiroshi SHIBATA
parent 8cc1aa82f1
commit cd7495a1d0
Notes: git 2025-05-13 05:12:36 +00:00
3 changed files with 66 additions and 39 deletions

View File

@ -393,7 +393,7 @@ RBIMPL_ATTR_NORETURN()
#endif
static void raise_parse_error(const char *format, JSON_ParserState *state)
{
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
const char *cursor = state->cursor;
long column = 0;
@ -412,22 +412,34 @@ static void raise_parse_error(const char *format, JSON_ParserState *state)
}
}
const char *ptr = state->cursor;
size_t len = ptr ? strnlen(ptr, PARSE_ERROR_FRAGMENT_LEN) : 0;
if (len == PARSE_ERROR_FRAGMENT_LEN) {
MEMCPY(buffer, ptr, char, PARSE_ERROR_FRAGMENT_LEN);
while (buffer[len - 1] >= 0x80 && buffer[len - 1] < 0xC0) { // Is continuation byte
len--;
const char *ptr = "EOF";
if (state->cursor && state->cursor < state->end) {
ptr = state->cursor;
size_t len = 0;
while (len < PARSE_ERROR_FRAGMENT_LEN) {
char ch = ptr[len];
if (!ch || ch == '\n' || ch == ' ' || ch == '\t' || ch == '\r') {
break;
}
len++;
}
if (buffer[len - 1] >= 0xC0) { // multibyte character start
len--;
}
if (len) {
buffer[0] = '\'';
MEMCPY(buffer + 1, ptr, char, len);
buffer[len] = '\0';
ptr = (const char *)buffer;
while (buffer[len] >= 0x80 && buffer[len] < 0xC0) { // Is continuation byte
len--;
}
if (buffer[len] >= 0xC0) { // multibyte character start
len--;
}
buffer[len + 1] = '\'';
buffer[len + 2] = '\0';
ptr = (const char *)buffer;
}
}
VALUE msg = rb_sprintf(format, ptr);
@ -473,16 +485,16 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p
signed char b;
uint32_t result = 0;
b = digit_values[p[0]];
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
result = (result << 4) | (unsigned char)b;
b = digit_values[p[1]];
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
result = (result << 4) | (unsigned char)b;
b = digit_values[p[2]];
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
result = (result << 4) | (unsigned char)b;
b = digit_values[p[3]];
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
result = (result << 4) | (unsigned char)b;
return result;
}
@ -532,11 +544,11 @@ json_eat_comments(JSON_ParserState *state)
break;
}
default:
raise_parse_error("unexpected token '%s'", state);
raise_parse_error("unexpected token %s", state);
break;
}
} else {
raise_parse_error("unexpected token '%s'", state);
raise_parse_error("unexpected token %s", state);
}
}
@ -655,7 +667,7 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
break;
case 'u':
if (pe > stringEnd - 5) {
raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, p);
raise_parse_error_at("incomplete unicode character escape sequence at %s", state, p);
} else {
uint32_t ch = unescape_unicode(state, (unsigned char *) ++pe);
pe += 3;
@ -672,7 +684,7 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
if ((ch & 0xFC00) == 0xD800) {
pe++;
if (pe > stringEnd - 6) {
raise_parse_error_at("incomplete surrogate pair at '%s'", state, p);
raise_parse_error_at("incomplete surrogate pair at %s", state, p);
}
if (pe[0] == '\\' && pe[1] == 'u') {
uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
@ -894,7 +906,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
return json_push_value(state, config, Qnil);
}
raise_parse_error("unexpected token '%s'", state);
raise_parse_error("unexpected token %s", state);
break;
case 't':
if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "true", 4) == 0)) {
@ -902,7 +914,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
return json_push_value(state, config, Qtrue);
}
raise_parse_error("unexpected token '%s'", state);
raise_parse_error("unexpected token %s", state);
break;
case 'f':
// Note: memcmp with a small power of two compile to an integer comparison
@ -911,7 +923,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
return json_push_value(state, config, Qfalse);
}
raise_parse_error("unexpected token '%s'", state);
raise_parse_error("unexpected token %s", state);
break;
case 'N':
// Note: memcmp with a small power of two compile to an integer comparison
@ -920,7 +932,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
return json_push_value(state, config, CNaN);
}
raise_parse_error("unexpected token '%s'", state);
raise_parse_error("unexpected token %s", state);
break;
case 'I':
if (config->allow_nan && (state->end - state->cursor >= 8) && (memcmp(state->cursor, "Infinity", 8) == 0)) {
@ -928,7 +940,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
return json_push_value(state, config, CInfinity);
}
raise_parse_error("unexpected token '%s'", state);
raise_parse_error("unexpected token %s", state);
break;
case '-':
// Note: memcmp with a small power of two compile to an integer comparison
@ -937,7 +949,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
state->cursor += 9;
return json_push_value(state, config, CMinusInfinity);
} else {
raise_parse_error("unexpected token '%s'", state);
raise_parse_error("unexpected token %s", state);
}
}
// Fallthrough
@ -1062,7 +1074,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
}
if (*state->cursor != '"') {
raise_parse_error("expected object key, got '%s'", state);
raise_parse_error("expected object key, got %s", state);
}
json_parse_string(state, config, true);
@ -1097,13 +1109,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
}
if (*state->cursor != '"') {
raise_parse_error("expected object key, got: '%s'", state);
raise_parse_error("expected object key, got: %s", state);
}
json_parse_string(state, config, true);
json_eat_whitespace(state);
if ((state->cursor >= state->end) || (*state->cursor != ':')) {
raise_parse_error("expected ':' after object key, got: '%s'", state);
raise_parse_error("expected ':' after object key, got: %s", state);
}
state->cursor++;
@ -1113,24 +1125,24 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
}
}
raise_parse_error("expected ',' or '}' after object value, got: '%s'", state);
raise_parse_error("expected ',' or '}' after object value, got: %s", state);
}
break;
}
default:
raise_parse_error("unexpected character: '%s'", state);
raise_parse_error("unexpected character: %s", state);
break;
}
raise_parse_error("unreacheable: '%s'", state);
raise_parse_error("unreacheable: %s", state);
}
static void json_ensure_eof(JSON_ParserState *state)
{
json_eat_whitespace(state);
if (state->cursor != state->end) {
raise_parse_error("unexpected token at end of stream '%s'", state);
raise_parse_error("unexpected token at end of stream %s", state);
}
}

View File

@ -14,20 +14,35 @@ class JSONExtParserTest < Test::Unit::TestCase
end
def test_error_messages
ex = assert_raise(ParserError) { parse('Infinity') }
ex = assert_raise(ParserError) { parse('Infinity something') }
unless RUBY_PLATFORM =~ /java/
assert_equal "unexpected token 'Infinity' at line 1 column 1", ex.message
end
ex = assert_raise(ParserError) { parse('-Infinity') }
ex = assert_raise(ParserError) { parse('foo bar') }
unless RUBY_PLATFORM =~ /java/
assert_equal "unexpected token 'foo' at line 1 column 1", ex.message
end
ex = assert_raise(ParserError) { parse('-Infinity something') }
unless RUBY_PLATFORM =~ /java/
assert_equal "unexpected token '-Infinity' at line 1 column 1", ex.message
end
ex = assert_raise(ParserError) { parse('NaN') }
ex = assert_raise(ParserError) { parse('NaN something') }
unless RUBY_PLATFORM =~ /java/
assert_equal "unexpected token 'NaN' at line 1 column 1", ex.message
end
ex = assert_raise(ParserError) { parse(' ') }
unless RUBY_PLATFORM =~ /java/
assert_equal "unexpected end of input at line 1 column 4", ex.message
end
ex = assert_raise(ParserError) { parse('{ ') }
unless RUBY_PLATFORM =~ /java/
assert_equal "expected object key, got EOF at line 1 column 5", ex.message
end
end
if GC.respond_to?(:stress=)

View File

@ -646,7 +646,7 @@ class JSONParserTest < Test::Unit::TestCase
JSON.parse('{"input":{"firstName":"Bob","lastName":"Mob","email":"bob@example.com"}')
end
if RUBY_ENGINE == "ruby"
assert_equal %(expected ',' or '}' after object value, got: '' at line 1 column 72), error.message
assert_equal %(expected ',' or '}' after object value, got: EOF at line 1 column 72), error.message
end
end