[ruby/json] Further improve parsing errors
Report EOF when applicable instead of an empty fragment. Also stop fragment extraction on first whitespace. https://github.com/ruby/json/commit/cc1daba860
This commit is contained in:
parent
8cc1aa82f1
commit
cd7495a1d0
Notes:
git
2025-05-13 05:12:36 +00:00
@ -393,7 +393,7 @@ RBIMPL_ATTR_NORETURN()
|
|||||||
#endif
|
#endif
|
||||||
static void raise_parse_error(const char *format, JSON_ParserState *state)
|
static void raise_parse_error(const char *format, JSON_ParserState *state)
|
||||||
{
|
{
|
||||||
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
|
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
|
||||||
|
|
||||||
const char *cursor = state->cursor;
|
const char *cursor = state->cursor;
|
||||||
long column = 0;
|
long column = 0;
|
||||||
@ -412,22 +412,34 @@ static void raise_parse_error(const char *format, JSON_ParserState *state)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *ptr = state->cursor;
|
const char *ptr = "EOF";
|
||||||
size_t len = ptr ? strnlen(ptr, PARSE_ERROR_FRAGMENT_LEN) : 0;
|
if (state->cursor && state->cursor < state->end) {
|
||||||
|
ptr = state->cursor;
|
||||||
if (len == PARSE_ERROR_FRAGMENT_LEN) {
|
size_t len = 0;
|
||||||
MEMCPY(buffer, ptr, char, PARSE_ERROR_FRAGMENT_LEN);
|
while (len < PARSE_ERROR_FRAGMENT_LEN) {
|
||||||
|
char ch = ptr[len];
|
||||||
while (buffer[len - 1] >= 0x80 && buffer[len - 1] < 0xC0) { // Is continuation byte
|
if (!ch || ch == '\n' || ch == ' ' || ch == '\t' || ch == '\r') {
|
||||||
len--;
|
break;
|
||||||
|
}
|
||||||
|
len++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (buffer[len - 1] >= 0xC0) { // multibyte character start
|
if (len) {
|
||||||
len--;
|
buffer[0] = '\'';
|
||||||
}
|
MEMCPY(buffer + 1, ptr, char, len);
|
||||||
|
|
||||||
buffer[len] = '\0';
|
while (buffer[len] >= 0x80 && buffer[len] < 0xC0) { // Is continuation byte
|
||||||
ptr = (const char *)buffer;
|
len--;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (buffer[len] >= 0xC0) { // multibyte character start
|
||||||
|
len--;
|
||||||
|
}
|
||||||
|
|
||||||
|
buffer[len + 1] = '\'';
|
||||||
|
buffer[len + 2] = '\0';
|
||||||
|
ptr = (const char *)buffer;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
VALUE msg = rb_sprintf(format, ptr);
|
VALUE msg = rb_sprintf(format, ptr);
|
||||||
@ -473,16 +485,16 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p
|
|||||||
signed char b;
|
signed char b;
|
||||||
uint32_t result = 0;
|
uint32_t result = 0;
|
||||||
b = digit_values[p[0]];
|
b = digit_values[p[0]];
|
||||||
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
|
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
|
||||||
result = (result << 4) | (unsigned char)b;
|
result = (result << 4) | (unsigned char)b;
|
||||||
b = digit_values[p[1]];
|
b = digit_values[p[1]];
|
||||||
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
|
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
|
||||||
result = (result << 4) | (unsigned char)b;
|
result = (result << 4) | (unsigned char)b;
|
||||||
b = digit_values[p[2]];
|
b = digit_values[p[2]];
|
||||||
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
|
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
|
||||||
result = (result << 4) | (unsigned char)b;
|
result = (result << 4) | (unsigned char)b;
|
||||||
b = digit_values[p[3]];
|
b = digit_values[p[3]];
|
||||||
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
|
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
|
||||||
result = (result << 4) | (unsigned char)b;
|
result = (result << 4) | (unsigned char)b;
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -532,11 +544,11 @@ json_eat_comments(JSON_ParserState *state)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
raise_parse_error("unexpected token '%s'", state);
|
raise_parse_error("unexpected token %s", state);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
raise_parse_error("unexpected token '%s'", state);
|
raise_parse_error("unexpected token %s", state);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -655,7 +667,7 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|||||||
break;
|
break;
|
||||||
case 'u':
|
case 'u':
|
||||||
if (pe > stringEnd - 5) {
|
if (pe > stringEnd - 5) {
|
||||||
raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, p);
|
raise_parse_error_at("incomplete unicode character escape sequence at %s", state, p);
|
||||||
} else {
|
} else {
|
||||||
uint32_t ch = unescape_unicode(state, (unsigned char *) ++pe);
|
uint32_t ch = unescape_unicode(state, (unsigned char *) ++pe);
|
||||||
pe += 3;
|
pe += 3;
|
||||||
@ -672,7 +684,7 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|||||||
if ((ch & 0xFC00) == 0xD800) {
|
if ((ch & 0xFC00) == 0xD800) {
|
||||||
pe++;
|
pe++;
|
||||||
if (pe > stringEnd - 6) {
|
if (pe > stringEnd - 6) {
|
||||||
raise_parse_error_at("incomplete surrogate pair at '%s'", state, p);
|
raise_parse_error_at("incomplete surrogate pair at %s", state, p);
|
||||||
}
|
}
|
||||||
if (pe[0] == '\\' && pe[1] == 'u') {
|
if (pe[0] == '\\' && pe[1] == 'u') {
|
||||||
uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
|
uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
|
||||||
@ -894,7 +906,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|||||||
return json_push_value(state, config, Qnil);
|
return json_push_value(state, config, Qnil);
|
||||||
}
|
}
|
||||||
|
|
||||||
raise_parse_error("unexpected token '%s'", state);
|
raise_parse_error("unexpected token %s", state);
|
||||||
break;
|
break;
|
||||||
case 't':
|
case 't':
|
||||||
if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "true", 4) == 0)) {
|
if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "true", 4) == 0)) {
|
||||||
@ -902,7 +914,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|||||||
return json_push_value(state, config, Qtrue);
|
return json_push_value(state, config, Qtrue);
|
||||||
}
|
}
|
||||||
|
|
||||||
raise_parse_error("unexpected token '%s'", state);
|
raise_parse_error("unexpected token %s", state);
|
||||||
break;
|
break;
|
||||||
case 'f':
|
case 'f':
|
||||||
// Note: memcmp with a small power of two compile to an integer comparison
|
// Note: memcmp with a small power of two compile to an integer comparison
|
||||||
@ -911,7 +923,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|||||||
return json_push_value(state, config, Qfalse);
|
return json_push_value(state, config, Qfalse);
|
||||||
}
|
}
|
||||||
|
|
||||||
raise_parse_error("unexpected token '%s'", state);
|
raise_parse_error("unexpected token %s", state);
|
||||||
break;
|
break;
|
||||||
case 'N':
|
case 'N':
|
||||||
// Note: memcmp with a small power of two compile to an integer comparison
|
// Note: memcmp with a small power of two compile to an integer comparison
|
||||||
@ -920,7 +932,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|||||||
return json_push_value(state, config, CNaN);
|
return json_push_value(state, config, CNaN);
|
||||||
}
|
}
|
||||||
|
|
||||||
raise_parse_error("unexpected token '%s'", state);
|
raise_parse_error("unexpected token %s", state);
|
||||||
break;
|
break;
|
||||||
case 'I':
|
case 'I':
|
||||||
if (config->allow_nan && (state->end - state->cursor >= 8) && (memcmp(state->cursor, "Infinity", 8) == 0)) {
|
if (config->allow_nan && (state->end - state->cursor >= 8) && (memcmp(state->cursor, "Infinity", 8) == 0)) {
|
||||||
@ -928,7 +940,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|||||||
return json_push_value(state, config, CInfinity);
|
return json_push_value(state, config, CInfinity);
|
||||||
}
|
}
|
||||||
|
|
||||||
raise_parse_error("unexpected token '%s'", state);
|
raise_parse_error("unexpected token %s", state);
|
||||||
break;
|
break;
|
||||||
case '-':
|
case '-':
|
||||||
// Note: memcmp with a small power of two compile to an integer comparison
|
// Note: memcmp with a small power of two compile to an integer comparison
|
||||||
@ -937,7 +949,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|||||||
state->cursor += 9;
|
state->cursor += 9;
|
||||||
return json_push_value(state, config, CMinusInfinity);
|
return json_push_value(state, config, CMinusInfinity);
|
||||||
} else {
|
} else {
|
||||||
raise_parse_error("unexpected token '%s'", state);
|
raise_parse_error("unexpected token %s", state);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Fallthrough
|
// Fallthrough
|
||||||
@ -1062,7 +1074,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (*state->cursor != '"') {
|
if (*state->cursor != '"') {
|
||||||
raise_parse_error("expected object key, got '%s'", state);
|
raise_parse_error("expected object key, got %s", state);
|
||||||
}
|
}
|
||||||
json_parse_string(state, config, true);
|
json_parse_string(state, config, true);
|
||||||
|
|
||||||
@ -1097,13 +1109,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (*state->cursor != '"') {
|
if (*state->cursor != '"') {
|
||||||
raise_parse_error("expected object key, got: '%s'", state);
|
raise_parse_error("expected object key, got: %s", state);
|
||||||
}
|
}
|
||||||
json_parse_string(state, config, true);
|
json_parse_string(state, config, true);
|
||||||
|
|
||||||
json_eat_whitespace(state);
|
json_eat_whitespace(state);
|
||||||
if ((state->cursor >= state->end) || (*state->cursor != ':')) {
|
if ((state->cursor >= state->end) || (*state->cursor != ':')) {
|
||||||
raise_parse_error("expected ':' after object key, got: '%s'", state);
|
raise_parse_error("expected ':' after object key, got: %s", state);
|
||||||
}
|
}
|
||||||
state->cursor++;
|
state->cursor++;
|
||||||
|
|
||||||
@ -1113,24 +1125,24 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
raise_parse_error("expected ',' or '}' after object value, got: '%s'", state);
|
raise_parse_error("expected ',' or '}' after object value, got: %s", state);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
raise_parse_error("unexpected character: '%s'", state);
|
raise_parse_error("unexpected character: %s", state);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
raise_parse_error("unreacheable: '%s'", state);
|
raise_parse_error("unreacheable: %s", state);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void json_ensure_eof(JSON_ParserState *state)
|
static void json_ensure_eof(JSON_ParserState *state)
|
||||||
{
|
{
|
||||||
json_eat_whitespace(state);
|
json_eat_whitespace(state);
|
||||||
if (state->cursor != state->end) {
|
if (state->cursor != state->end) {
|
||||||
raise_parse_error("unexpected token at end of stream '%s'", state);
|
raise_parse_error("unexpected token at end of stream %s", state);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -14,20 +14,35 @@ class JSONExtParserTest < Test::Unit::TestCase
|
|||||||
end
|
end
|
||||||
|
|
||||||
def test_error_messages
|
def test_error_messages
|
||||||
ex = assert_raise(ParserError) { parse('Infinity') }
|
ex = assert_raise(ParserError) { parse('Infinity something') }
|
||||||
unless RUBY_PLATFORM =~ /java/
|
unless RUBY_PLATFORM =~ /java/
|
||||||
assert_equal "unexpected token 'Infinity' at line 1 column 1", ex.message
|
assert_equal "unexpected token 'Infinity' at line 1 column 1", ex.message
|
||||||
end
|
end
|
||||||
|
|
||||||
ex = assert_raise(ParserError) { parse('-Infinity') }
|
ex = assert_raise(ParserError) { parse('foo bar') }
|
||||||
|
unless RUBY_PLATFORM =~ /java/
|
||||||
|
assert_equal "unexpected token 'foo' at line 1 column 1", ex.message
|
||||||
|
end
|
||||||
|
|
||||||
|
ex = assert_raise(ParserError) { parse('-Infinity something') }
|
||||||
unless RUBY_PLATFORM =~ /java/
|
unless RUBY_PLATFORM =~ /java/
|
||||||
assert_equal "unexpected token '-Infinity' at line 1 column 1", ex.message
|
assert_equal "unexpected token '-Infinity' at line 1 column 1", ex.message
|
||||||
end
|
end
|
||||||
|
|
||||||
ex = assert_raise(ParserError) { parse('NaN') }
|
ex = assert_raise(ParserError) { parse('NaN something') }
|
||||||
unless RUBY_PLATFORM =~ /java/
|
unless RUBY_PLATFORM =~ /java/
|
||||||
assert_equal "unexpected token 'NaN' at line 1 column 1", ex.message
|
assert_equal "unexpected token 'NaN' at line 1 column 1", ex.message
|
||||||
end
|
end
|
||||||
|
|
||||||
|
ex = assert_raise(ParserError) { parse(' ') }
|
||||||
|
unless RUBY_PLATFORM =~ /java/
|
||||||
|
assert_equal "unexpected end of input at line 1 column 4", ex.message
|
||||||
|
end
|
||||||
|
|
||||||
|
ex = assert_raise(ParserError) { parse('{ ') }
|
||||||
|
unless RUBY_PLATFORM =~ /java/
|
||||||
|
assert_equal "expected object key, got EOF at line 1 column 5", ex.message
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
if GC.respond_to?(:stress=)
|
if GC.respond_to?(:stress=)
|
||||||
|
@ -646,7 +646,7 @@ class JSONParserTest < Test::Unit::TestCase
|
|||||||
JSON.parse('{"input":{"firstName":"Bob","lastName":"Mob","email":"bob@example.com"}')
|
JSON.parse('{"input":{"firstName":"Bob","lastName":"Mob","email":"bob@example.com"}')
|
||||||
end
|
end
|
||||||
if RUBY_ENGINE == "ruby"
|
if RUBY_ENGINE == "ruby"
|
||||||
assert_equal %(expected ',' or '}' after object value, got: '' at line 1 column 72), error.message
|
assert_equal %(expected ',' or '}' after object value, got: EOF at line 1 column 72), error.message
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user