[ruby/json] Further improve parsing errors
Report EOF when applicable instead of an empty fragment. Also stop fragment extraction on first whitespace. https://github.com/ruby/json/commit/cc1daba860
This commit is contained in:
parent
8cc1aa82f1
commit
cd7495a1d0
Notes:
git
2025-05-13 05:12:36 +00:00
@ -393,7 +393,7 @@ RBIMPL_ATTR_NORETURN()
|
||||
#endif
|
||||
static void raise_parse_error(const char *format, JSON_ParserState *state)
|
||||
{
|
||||
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
|
||||
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
|
||||
|
||||
const char *cursor = state->cursor;
|
||||
long column = 0;
|
||||
@ -412,22 +412,34 @@ static void raise_parse_error(const char *format, JSON_ParserState *state)
|
||||
}
|
||||
}
|
||||
|
||||
const char *ptr = state->cursor;
|
||||
size_t len = ptr ? strnlen(ptr, PARSE_ERROR_FRAGMENT_LEN) : 0;
|
||||
|
||||
if (len == PARSE_ERROR_FRAGMENT_LEN) {
|
||||
MEMCPY(buffer, ptr, char, PARSE_ERROR_FRAGMENT_LEN);
|
||||
|
||||
while (buffer[len - 1] >= 0x80 && buffer[len - 1] < 0xC0) { // Is continuation byte
|
||||
len--;
|
||||
const char *ptr = "EOF";
|
||||
if (state->cursor && state->cursor < state->end) {
|
||||
ptr = state->cursor;
|
||||
size_t len = 0;
|
||||
while (len < PARSE_ERROR_FRAGMENT_LEN) {
|
||||
char ch = ptr[len];
|
||||
if (!ch || ch == '\n' || ch == ' ' || ch == '\t' || ch == '\r') {
|
||||
break;
|
||||
}
|
||||
len++;
|
||||
}
|
||||
|
||||
if (buffer[len - 1] >= 0xC0) { // multibyte character start
|
||||
len--;
|
||||
}
|
||||
if (len) {
|
||||
buffer[0] = '\'';
|
||||
MEMCPY(buffer + 1, ptr, char, len);
|
||||
|
||||
buffer[len] = '\0';
|
||||
ptr = (const char *)buffer;
|
||||
while (buffer[len] >= 0x80 && buffer[len] < 0xC0) { // Is continuation byte
|
||||
len--;
|
||||
}
|
||||
|
||||
if (buffer[len] >= 0xC0) { // multibyte character start
|
||||
len--;
|
||||
}
|
||||
|
||||
buffer[len + 1] = '\'';
|
||||
buffer[len + 2] = '\0';
|
||||
ptr = (const char *)buffer;
|
||||
}
|
||||
}
|
||||
|
||||
VALUE msg = rb_sprintf(format, ptr);
|
||||
@ -473,16 +485,16 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p
|
||||
signed char b;
|
||||
uint32_t result = 0;
|
||||
b = digit_values[p[0]];
|
||||
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
|
||||
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
|
||||
result = (result << 4) | (unsigned char)b;
|
||||
b = digit_values[p[1]];
|
||||
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
|
||||
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
|
||||
result = (result << 4) | (unsigned char)b;
|
||||
b = digit_values[p[2]];
|
||||
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
|
||||
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
|
||||
result = (result << 4) | (unsigned char)b;
|
||||
b = digit_values[p[3]];
|
||||
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
|
||||
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
|
||||
result = (result << 4) | (unsigned char)b;
|
||||
return result;
|
||||
}
|
||||
@ -532,11 +544,11 @@ json_eat_comments(JSON_ParserState *state)
|
||||
break;
|
||||
}
|
||||
default:
|
||||
raise_parse_error("unexpected token '%s'", state);
|
||||
raise_parse_error("unexpected token %s", state);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
raise_parse_error("unexpected token '%s'", state);
|
||||
raise_parse_error("unexpected token %s", state);
|
||||
}
|
||||
}
|
||||
|
||||
@ -655,7 +667,7 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
||||
break;
|
||||
case 'u':
|
||||
if (pe > stringEnd - 5) {
|
||||
raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, p);
|
||||
raise_parse_error_at("incomplete unicode character escape sequence at %s", state, p);
|
||||
} else {
|
||||
uint32_t ch = unescape_unicode(state, (unsigned char *) ++pe);
|
||||
pe += 3;
|
||||
@ -672,7 +684,7 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
||||
if ((ch & 0xFC00) == 0xD800) {
|
||||
pe++;
|
||||
if (pe > stringEnd - 6) {
|
||||
raise_parse_error_at("incomplete surrogate pair at '%s'", state, p);
|
||||
raise_parse_error_at("incomplete surrogate pair at %s", state, p);
|
||||
}
|
||||
if (pe[0] == '\\' && pe[1] == 'u') {
|
||||
uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
|
||||
@ -894,7 +906,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
||||
return json_push_value(state, config, Qnil);
|
||||
}
|
||||
|
||||
raise_parse_error("unexpected token '%s'", state);
|
||||
raise_parse_error("unexpected token %s", state);
|
||||
break;
|
||||
case 't':
|
||||
if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "true", 4) == 0)) {
|
||||
@ -902,7 +914,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
||||
return json_push_value(state, config, Qtrue);
|
||||
}
|
||||
|
||||
raise_parse_error("unexpected token '%s'", state);
|
||||
raise_parse_error("unexpected token %s", state);
|
||||
break;
|
||||
case 'f':
|
||||
// Note: memcmp with a small power of two compile to an integer comparison
|
||||
@ -911,7 +923,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
||||
return json_push_value(state, config, Qfalse);
|
||||
}
|
||||
|
||||
raise_parse_error("unexpected token '%s'", state);
|
||||
raise_parse_error("unexpected token %s", state);
|
||||
break;
|
||||
case 'N':
|
||||
// Note: memcmp with a small power of two compile to an integer comparison
|
||||
@ -920,7 +932,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
||||
return json_push_value(state, config, CNaN);
|
||||
}
|
||||
|
||||
raise_parse_error("unexpected token '%s'", state);
|
||||
raise_parse_error("unexpected token %s", state);
|
||||
break;
|
||||
case 'I':
|
||||
if (config->allow_nan && (state->end - state->cursor >= 8) && (memcmp(state->cursor, "Infinity", 8) == 0)) {
|
||||
@ -928,7 +940,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
||||
return json_push_value(state, config, CInfinity);
|
||||
}
|
||||
|
||||
raise_parse_error("unexpected token '%s'", state);
|
||||
raise_parse_error("unexpected token %s", state);
|
||||
break;
|
||||
case '-':
|
||||
// Note: memcmp with a small power of two compile to an integer comparison
|
||||
@ -937,7 +949,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
||||
state->cursor += 9;
|
||||
return json_push_value(state, config, CMinusInfinity);
|
||||
} else {
|
||||
raise_parse_error("unexpected token '%s'", state);
|
||||
raise_parse_error("unexpected token %s", state);
|
||||
}
|
||||
}
|
||||
// Fallthrough
|
||||
@ -1062,7 +1074,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
||||
}
|
||||
|
||||
if (*state->cursor != '"') {
|
||||
raise_parse_error("expected object key, got '%s'", state);
|
||||
raise_parse_error("expected object key, got %s", state);
|
||||
}
|
||||
json_parse_string(state, config, true);
|
||||
|
||||
@ -1097,13 +1109,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
||||
}
|
||||
|
||||
if (*state->cursor != '"') {
|
||||
raise_parse_error("expected object key, got: '%s'", state);
|
||||
raise_parse_error("expected object key, got: %s", state);
|
||||
}
|
||||
json_parse_string(state, config, true);
|
||||
|
||||
json_eat_whitespace(state);
|
||||
if ((state->cursor >= state->end) || (*state->cursor != ':')) {
|
||||
raise_parse_error("expected ':' after object key, got: '%s'", state);
|
||||
raise_parse_error("expected ':' after object key, got: %s", state);
|
||||
}
|
||||
state->cursor++;
|
||||
|
||||
@ -1113,24 +1125,24 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
||||
}
|
||||
}
|
||||
|
||||
raise_parse_error("expected ',' or '}' after object value, got: '%s'", state);
|
||||
raise_parse_error("expected ',' or '}' after object value, got: %s", state);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
raise_parse_error("unexpected character: '%s'", state);
|
||||
raise_parse_error("unexpected character: %s", state);
|
||||
break;
|
||||
}
|
||||
|
||||
raise_parse_error("unreacheable: '%s'", state);
|
||||
raise_parse_error("unreacheable: %s", state);
|
||||
}
|
||||
|
||||
static void json_ensure_eof(JSON_ParserState *state)
|
||||
{
|
||||
json_eat_whitespace(state);
|
||||
if (state->cursor != state->end) {
|
||||
raise_parse_error("unexpected token at end of stream '%s'", state);
|
||||
raise_parse_error("unexpected token at end of stream %s", state);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -14,20 +14,35 @@ class JSONExtParserTest < Test::Unit::TestCase
|
||||
end
|
||||
|
||||
def test_error_messages
|
||||
ex = assert_raise(ParserError) { parse('Infinity') }
|
||||
ex = assert_raise(ParserError) { parse('Infinity something') }
|
||||
unless RUBY_PLATFORM =~ /java/
|
||||
assert_equal "unexpected token 'Infinity' at line 1 column 1", ex.message
|
||||
end
|
||||
|
||||
ex = assert_raise(ParserError) { parse('-Infinity') }
|
||||
ex = assert_raise(ParserError) { parse('foo bar') }
|
||||
unless RUBY_PLATFORM =~ /java/
|
||||
assert_equal "unexpected token 'foo' at line 1 column 1", ex.message
|
||||
end
|
||||
|
||||
ex = assert_raise(ParserError) { parse('-Infinity something') }
|
||||
unless RUBY_PLATFORM =~ /java/
|
||||
assert_equal "unexpected token '-Infinity' at line 1 column 1", ex.message
|
||||
end
|
||||
|
||||
ex = assert_raise(ParserError) { parse('NaN') }
|
||||
ex = assert_raise(ParserError) { parse('NaN something') }
|
||||
unless RUBY_PLATFORM =~ /java/
|
||||
assert_equal "unexpected token 'NaN' at line 1 column 1", ex.message
|
||||
end
|
||||
|
||||
ex = assert_raise(ParserError) { parse(' ') }
|
||||
unless RUBY_PLATFORM =~ /java/
|
||||
assert_equal "unexpected end of input at line 1 column 4", ex.message
|
||||
end
|
||||
|
||||
ex = assert_raise(ParserError) { parse('{ ') }
|
||||
unless RUBY_PLATFORM =~ /java/
|
||||
assert_equal "expected object key, got EOF at line 1 column 5", ex.message
|
||||
end
|
||||
end
|
||||
|
||||
if GC.respond_to?(:stress=)
|
||||
|
@ -646,7 +646,7 @@ class JSONParserTest < Test::Unit::TestCase
|
||||
JSON.parse('{"input":{"firstName":"Bob","lastName":"Mob","email":"bob@example.com"}')
|
||||
end
|
||||
if RUBY_ENGINE == "ruby"
|
||||
assert_equal %(expected ',' or '}' after object value, got: '' at line 1 column 72), error.message
|
||||
assert_equal %(expected ',' or '}' after object value, got: EOF at line 1 column 72), error.message
|
||||
end
|
||||
end
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user