[Bug #20578] ripper: Fix dispatching part at invalid escapes
This commit is contained in:
parent
2677ab1607
commit
2e59cf00cc
46
parse.y
46
parse.y
@ -1512,6 +1512,7 @@ YYLTYPE *rb_parser_set_location(struct parser_params *p, YYLTYPE *yylloc);
|
|||||||
void ruby_show_error_line(struct parser_params *p, VALUE errbuf, const YYLTYPE *yylloc, int lineno, rb_parser_string_t *str);
|
void ruby_show_error_line(struct parser_params *p, VALUE errbuf, const YYLTYPE *yylloc, int lineno, rb_parser_string_t *str);
|
||||||
RUBY_SYMBOL_EXPORT_END
|
RUBY_SYMBOL_EXPORT_END
|
||||||
|
|
||||||
|
static void flush_string_content(struct parser_params *p, rb_encoding *enc, size_t back);
|
||||||
static void error_duplicate_pattern_variable(struct parser_params *p, ID id, const YYLTYPE *loc);
|
static void error_duplicate_pattern_variable(struct parser_params *p, ID id, const YYLTYPE *loc);
|
||||||
static void error_duplicate_pattern_key(struct parser_params *p, ID id, const YYLTYPE *loc);
|
static void error_duplicate_pattern_key(struct parser_params *p, ID id, const YYLTYPE *loc);
|
||||||
static VALUE formal_argument_error(struct parser_params*, ID);
|
static VALUE formal_argument_error(struct parser_params*, ID);
|
||||||
@ -7885,6 +7886,7 @@ tok_hex(struct parser_params *p, size_t *numlen)
|
|||||||
|
|
||||||
c = (int)ruby_scan_hex(p->lex.pcur, 2, numlen);
|
c = (int)ruby_scan_hex(p->lex.pcur, 2, numlen);
|
||||||
if (!*numlen) {
|
if (!*numlen) {
|
||||||
|
flush_string_content(p, p->enc, rb_strlen_lit("\\x"));
|
||||||
yyerror0("invalid hex escape");
|
yyerror0("invalid hex escape");
|
||||||
dispatch_scan_event(p, tSTRING_CONTENT);
|
dispatch_scan_event(p, tSTRING_CONTENT);
|
||||||
return 0;
|
return 0;
|
||||||
@ -7927,27 +7929,33 @@ escaped_control_code(int c)
|
|||||||
|
|
||||||
static int
|
static int
|
||||||
tokadd_codepoint(struct parser_params *p, rb_encoding **encp,
|
tokadd_codepoint(struct parser_params *p, rb_encoding **encp,
|
||||||
int regexp_literal, int wide)
|
int regexp_literal, const char *begin)
|
||||||
{
|
{
|
||||||
|
const int wide = !begin;
|
||||||
size_t numlen;
|
size_t numlen;
|
||||||
int codepoint = (int)ruby_scan_hex(p->lex.pcur, wide ? p->lex.pend - p->lex.pcur : 4, &numlen);
|
int codepoint = (int)ruby_scan_hex(p->lex.pcur, wide ? p->lex.pend - p->lex.pcur : 4, &numlen);
|
||||||
|
|
||||||
p->lex.pcur += numlen;
|
p->lex.pcur += numlen;
|
||||||
if (p->lex.strterm == NULL ||
|
if (p->lex.strterm == NULL ||
|
||||||
strterm_is_heredoc(p->lex.strterm) ||
|
strterm_is_heredoc(p->lex.strterm) ||
|
||||||
(p->lex.strterm->u.literal.func != str_regexp)) {
|
(p->lex.strterm->u.literal.func != str_regexp)) {
|
||||||
|
if (!begin) begin = p->lex.pcur;
|
||||||
if (wide ? (numlen == 0 || numlen > 6) : (numlen < 4)) {
|
if (wide ? (numlen == 0 || numlen > 6) : (numlen < 4)) {
|
||||||
literal_flush(p, p->lex.pcur);
|
flush_string_content(p, rb_utf8_encoding(), p->lex.pcur - begin);
|
||||||
yyerror0("invalid Unicode escape");
|
yyerror0("invalid Unicode escape");
|
||||||
|
dispatch_scan_event(p, tSTRING_CONTENT);
|
||||||
return wide && numlen > 0;
|
return wide && numlen > 0;
|
||||||
}
|
}
|
||||||
if (codepoint > 0x10ffff) {
|
if (codepoint > 0x10ffff) {
|
||||||
literal_flush(p, p->lex.pcur);
|
flush_string_content(p, rb_utf8_encoding(), p->lex.pcur - begin);
|
||||||
yyerror0("invalid Unicode codepoint (too large)");
|
yyerror0("invalid Unicode codepoint (too large)");
|
||||||
|
dispatch_scan_event(p, tSTRING_CONTENT);
|
||||||
return wide;
|
return wide;
|
||||||
}
|
}
|
||||||
if ((codepoint & 0xfffff800) == 0xd800) {
|
if ((codepoint & 0xfffff800) == 0xd800) {
|
||||||
literal_flush(p, p->lex.pcur);
|
flush_string_content(p, rb_utf8_encoding(), p->lex.pcur - begin);
|
||||||
yyerror0("invalid Unicode codepoint");
|
yyerror0("invalid Unicode codepoint");
|
||||||
|
dispatch_scan_event(p, tSTRING_CONTENT);
|
||||||
return wide;
|
return wide;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -8035,7 +8043,7 @@ tokadd_utf8(struct parser_params *p, rb_encoding **encp,
|
|||||||
if (second == multiple_codepoints)
|
if (second == multiple_codepoints)
|
||||||
second = p->lex.pcur;
|
second = p->lex.pcur;
|
||||||
if (regexp_literal) tokadd(p, last);
|
if (regexp_literal) tokadd(p, last);
|
||||||
if (!tokadd_codepoint(p, encp, regexp_literal, TRUE)) {
|
if (!tokadd_codepoint(p, encp, regexp_literal, NULL)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
while (ISSPACE(c = peekc(p))) {
|
while (ISSPACE(c = peekc(p))) {
|
||||||
@ -8048,8 +8056,9 @@ tokadd_utf8(struct parser_params *p, rb_encoding **encp,
|
|||||||
|
|
||||||
if (c != close_brace) {
|
if (c != close_brace) {
|
||||||
unterminated:
|
unterminated:
|
||||||
token_flush(p);
|
flush_string_content(p, rb_utf8_encoding(), 0);
|
||||||
yyerror0("unterminated Unicode escape");
|
yyerror0("unterminated Unicode escape");
|
||||||
|
dispatch_scan_event(p, tSTRING_CONTENT);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (second && second != multiple_codepoints) {
|
if (second && second != multiple_codepoints) {
|
||||||
@ -8067,7 +8076,7 @@ tokadd_utf8(struct parser_params *p, rb_encoding **encp,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else { /* handle \uxxxx form */
|
else { /* handle \uxxxx form */
|
||||||
if (!tokadd_codepoint(p, encp, regexp_literal, FALSE)) {
|
if (!tokadd_codepoint(p, encp, regexp_literal, p->lex.pcur - rb_strlen_lit("\\u"))) {
|
||||||
token_flush(p);
|
token_flush(p);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -8078,7 +8087,7 @@ tokadd_utf8(struct parser_params *p, rb_encoding **encp,
|
|||||||
#define ESCAPE_META 2
|
#define ESCAPE_META 2
|
||||||
|
|
||||||
static int
|
static int
|
||||||
read_escape(struct parser_params *p, int flags)
|
read_escape(struct parser_params *p, int flags, const char *begin)
|
||||||
{
|
{
|
||||||
int c;
|
int c;
|
||||||
size_t numlen;
|
size_t numlen;
|
||||||
@ -8137,7 +8146,7 @@ read_escape(struct parser_params *p, int flags)
|
|||||||
nextc(p);
|
nextc(p);
|
||||||
goto eof;
|
goto eof;
|
||||||
}
|
}
|
||||||
return read_escape(p, flags|ESCAPE_META) | 0x80;
|
return read_escape(p, flags|ESCAPE_META, begin) | 0x80;
|
||||||
}
|
}
|
||||||
else if (c == -1) goto eof;
|
else if (c == -1) goto eof;
|
||||||
else if (!ISASCII(c)) {
|
else if (!ISASCII(c)) {
|
||||||
@ -8170,7 +8179,7 @@ read_escape(struct parser_params *p, int flags)
|
|||||||
nextc(p);
|
nextc(p);
|
||||||
goto eof;
|
goto eof;
|
||||||
}
|
}
|
||||||
c = read_escape(p, flags|ESCAPE_CONTROL);
|
c = read_escape(p, flags|ESCAPE_CONTROL, begin);
|
||||||
}
|
}
|
||||||
else if (c == '?')
|
else if (c == '?')
|
||||||
return 0177;
|
return 0177;
|
||||||
@ -8205,6 +8214,7 @@ read_escape(struct parser_params *p, int flags)
|
|||||||
|
|
||||||
eof:
|
eof:
|
||||||
case -1:
|
case -1:
|
||||||
|
flush_string_content(p, p->enc, p->lex.pcur - begin);
|
||||||
yyerror0("Invalid escape character syntax");
|
yyerror0("Invalid escape character syntax");
|
||||||
dispatch_scan_event(p, tSTRING_CONTENT);
|
dispatch_scan_event(p, tSTRING_CONTENT);
|
||||||
return '\0';
|
return '\0';
|
||||||
@ -8226,6 +8236,7 @@ tokadd_escape(struct parser_params *p)
|
|||||||
{
|
{
|
||||||
int c;
|
int c;
|
||||||
size_t numlen;
|
size_t numlen;
|
||||||
|
const char *begin = p->lex.pcur;
|
||||||
|
|
||||||
switch (c = nextc(p)) {
|
switch (c = nextc(p)) {
|
||||||
case '\n':
|
case '\n':
|
||||||
@ -8251,6 +8262,7 @@ tokadd_escape(struct parser_params *p)
|
|||||||
|
|
||||||
eof:
|
eof:
|
||||||
case -1:
|
case -1:
|
||||||
|
flush_string_content(p, p->enc, p->lex.pcur - begin);
|
||||||
yyerror0("Invalid escape character syntax");
|
yyerror0("Invalid escape character syntax");
|
||||||
token_flush(p);
|
token_flush(p);
|
||||||
return -1;
|
return -1;
|
||||||
@ -8521,7 +8533,7 @@ tokadd_string(struct parser_params *p,
|
|||||||
case 'C':
|
case 'C':
|
||||||
case 'M': {
|
case 'M': {
|
||||||
pushback(p, c);
|
pushback(p, c);
|
||||||
c = read_escape(p, 0);
|
c = read_escape(p, 0, p->lex.pcur - 1);
|
||||||
|
|
||||||
char *t = tokspace(p, rb_strlen_lit("\\x00"));
|
char *t = tokspace(p, rb_strlen_lit("\\x00"));
|
||||||
*t++ = '\\';
|
*t++ = '\\';
|
||||||
@ -8547,7 +8559,7 @@ tokadd_string(struct parser_params *p,
|
|||||||
else if (func & STR_FUNC_EXPAND) {
|
else if (func & STR_FUNC_EXPAND) {
|
||||||
pushback(p, c);
|
pushback(p, c);
|
||||||
if (func & STR_FUNC_ESCAPE) tokadd(p, '\\');
|
if (func & STR_FUNC_ESCAPE) tokadd(p, '\\');
|
||||||
c = read_escape(p, 0);
|
c = read_escape(p, 0, p->lex.pcur - 1);
|
||||||
}
|
}
|
||||||
else if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) {
|
else if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) {
|
||||||
/* ignore backslashed spaces in %w */
|
/* ignore backslashed spaces in %w */
|
||||||
@ -8597,8 +8609,9 @@ tokadd_string(struct parser_params *p,
|
|||||||
#define NEW_STRTERM(func, term, paren) new_strterm(p, func, term, paren)
|
#define NEW_STRTERM(func, term, paren) new_strterm(p, func, term, paren)
|
||||||
|
|
||||||
static void
|
static void
|
||||||
flush_string_content(struct parser_params *p, rb_encoding *enc)
|
flush_string_content(struct parser_params *p, rb_encoding *enc, size_t back)
|
||||||
{
|
{
|
||||||
|
p->lex.pcur -= back;
|
||||||
if (has_delayed_token(p)) {
|
if (has_delayed_token(p)) {
|
||||||
ptrdiff_t len = p->lex.pcur - p->lex.ptok;
|
ptrdiff_t len = p->lex.pcur - p->lex.ptok;
|
||||||
if (len > 0) {
|
if (len > 0) {
|
||||||
@ -8610,6 +8623,7 @@ flush_string_content(struct parser_params *p, rb_encoding *enc)
|
|||||||
p->lex.ptok = p->lex.pcur;
|
p->lex.ptok = p->lex.pcur;
|
||||||
}
|
}
|
||||||
dispatch_scan_event(p, tSTRING_CONTENT);
|
dispatch_scan_event(p, tSTRING_CONTENT);
|
||||||
|
p->lex.pcur += back;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* this can be shared with ripper, since it's independent from struct
|
/* this can be shared with ripper, since it's independent from struct
|
||||||
@ -8777,7 +8791,7 @@ parse_string(struct parser_params *p, rb_strterm_literal_t *quote)
|
|||||||
tokfix(p);
|
tokfix(p);
|
||||||
lit = STR_NEW3(tok(p), toklen(p), enc, func);
|
lit = STR_NEW3(tok(p), toklen(p), enc, func);
|
||||||
set_yylval_str(lit);
|
set_yylval_str(lit);
|
||||||
flush_string_content(p, enc);
|
flush_string_content(p, enc, 0);
|
||||||
|
|
||||||
return tSTRING_CONTENT;
|
return tSTRING_CONTENT;
|
||||||
}
|
}
|
||||||
@ -9246,7 +9260,7 @@ here_document(struct parser_params *p, rb_strterm_heredoc_t *here)
|
|||||||
#ifndef RIPPER
|
#ifndef RIPPER
|
||||||
if (bol) nd_set_fl_newline(yylval.node);
|
if (bol) nd_set_fl_newline(yylval.node);
|
||||||
#endif
|
#endif
|
||||||
flush_string_content(p, enc);
|
flush_string_content(p, enc, 0);
|
||||||
return tSTRING_CONTENT;
|
return tSTRING_CONTENT;
|
||||||
}
|
}
|
||||||
tokadd(p, nextc(p));
|
tokadd(p, nextc(p));
|
||||||
@ -10064,7 +10078,7 @@ parse_qmark(struct parser_params *p, int space_seen)
|
|||||||
if (tokadd_mbchar(p, c) == -1) return 0;
|
if (tokadd_mbchar(p, c) == -1) return 0;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
c = read_escape(p, 0);
|
c = read_escape(p, 0, p->lex.pcur - rb_strlen_lit("?\\"));
|
||||||
tokadd(p, c);
|
tokadd(p, c);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -390,6 +390,115 @@ world"
|
|||||||
assert_lexer(expected, code)
|
assert_lexer(expected, code)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_invalid_escape_string
|
||||||
|
code = "\"hello\\x world"
|
||||||
|
expected = [
|
||||||
|
[[1, 0], :on_tstring_beg, "\"", state(:EXPR_BEG)],
|
||||||
|
[[1, 1], :on_tstring_content, "hello", state(:EXPR_BEG)],
|
||||||
|
[[1, 5], :on_tstring_content, "\\x", state(:EXPR_BEG)],
|
||||||
|
[[1, 7], :on_tstring_content, " world", state(:EXPR_BEG)],
|
||||||
|
]
|
||||||
|
|
||||||
|
code = "\"\nhello\\x world"
|
||||||
|
expected = [
|
||||||
|
[[1, 0], :on_tstring_beg, "\"", state(:EXPR_BEG)],
|
||||||
|
[[1, 1], :on_tstring_content, "\n" "hello", state(:EXPR_BEG)],
|
||||||
|
[[2, 5], :on_tstring_content, "\\x", state(:EXPR_BEG)],
|
||||||
|
[[2, 7], :on_tstring_content, " world", state(:EXPR_BEG)],
|
||||||
|
]
|
||||||
|
assert_lexer(expected, code)
|
||||||
|
|
||||||
|
code = "\"\n\\Cxx\""
|
||||||
|
expected = [
|
||||||
|
[[1, 0], :on_tstring_beg, "\"", state(:EXPR_BEG)],
|
||||||
|
[[1, 1], :on_tstring_content, "\n", state(:EXPR_BEG)],
|
||||||
|
[[2, 0], :on_tstring_content, "\\Cx", state(:EXPR_BEG)],
|
||||||
|
[[2, 3], :on_tstring_content, "x", state(:EXPR_BEG)],
|
||||||
|
[[2, 4], :on_tstring_end, "\"", state(:EXPR_END)],
|
||||||
|
]
|
||||||
|
assert_lexer(expected, code)
|
||||||
|
|
||||||
|
code = "\"\n\\Mxx\""
|
||||||
|
expected = [
|
||||||
|
[[1, 0], :on_tstring_beg, "\"", state(:EXPR_BEG)],
|
||||||
|
[[1, 1], :on_tstring_content, "\n", state(:EXPR_BEG)],
|
||||||
|
[[2, 0], :on_tstring_content, "\\Mx", state(:EXPR_BEG)],
|
||||||
|
[[2, 3], :on_tstring_content, "x", state(:EXPR_BEG)],
|
||||||
|
[[2, 4], :on_tstring_end, "\"", state(:EXPR_END)],
|
||||||
|
]
|
||||||
|
assert_lexer(expected, code)
|
||||||
|
|
||||||
|
code = "\"\n\\c\\cx\""
|
||||||
|
expected = [
|
||||||
|
[[1, 0], :on_tstring_beg, "\"", state(:EXPR_BEG)],
|
||||||
|
[[1, 1], :on_tstring_content, "\n", state(:EXPR_BEG)],
|
||||||
|
[[2, 0], :on_tstring_content, "\\c\\c", state(:EXPR_BEG)],
|
||||||
|
[[2, 4], :on_tstring_content, "x", state(:EXPR_BEG)],
|
||||||
|
[[2, 5], :on_tstring_end, "\"", state(:EXPR_END)],
|
||||||
|
]
|
||||||
|
assert_lexer(expected, code)
|
||||||
|
|
||||||
|
code = "\"\n\\ux\""
|
||||||
|
expected = [
|
||||||
|
[[1, 0], :on_tstring_beg, "\"", state(:EXPR_BEG)],
|
||||||
|
[[1, 1], :on_tstring_content, "\n", state(:EXPR_BEG)],
|
||||||
|
[[2, 0], :on_tstring_content, "\\u", state(:EXPR_BEG)],
|
||||||
|
[[2, 2], :on_tstring_content, "x", state(:EXPR_BEG)],
|
||||||
|
[[2, 3], :on_tstring_end, "\"", state(:EXPR_END)],
|
||||||
|
]
|
||||||
|
assert_lexer(expected, code)
|
||||||
|
|
||||||
|
code = "\"\n\\xx\""
|
||||||
|
expected = [
|
||||||
|
[[1, 0], :on_tstring_beg, "\"", state(:EXPR_BEG)],
|
||||||
|
[[1, 1], :on_tstring_content, "\n", state(:EXPR_BEG)],
|
||||||
|
[[2, 0], :on_tstring_content, "\\x", state(:EXPR_BEG)],
|
||||||
|
[[2, 2], :on_tstring_content, "x", state(:EXPR_BEG)],
|
||||||
|
[[2, 3], :on_tstring_end, "\"", state(:EXPR_END)],
|
||||||
|
]
|
||||||
|
assert_lexer(expected, code)
|
||||||
|
|
||||||
|
code = "<<A\n\n\\xyz"
|
||||||
|
expected = [
|
||||||
|
[[1, 0], :on_heredoc_beg, "<<A", state(:EXPR_BEG)],
|
||||||
|
[[1, 3], :on_nl, "\n", state(:EXPR_BEG)],
|
||||||
|
[[2, 0], :on_tstring_content, "\n", state(:EXPR_BEG)],
|
||||||
|
[[3, 0], :on_tstring_content, "\\x", state(:EXPR_BEG)],
|
||||||
|
[[3, 2], :on_tstring_content, "yz", state(:EXPR_BEG)],
|
||||||
|
]
|
||||||
|
assert_lexer(expected, code)
|
||||||
|
|
||||||
|
code = "%(\n\\xyz)"
|
||||||
|
expected = [
|
||||||
|
[[1, 0], :on_tstring_beg, "%(", state(:EXPR_BEG)],
|
||||||
|
[[1, 2], :on_tstring_content, "\n", state(:EXPR_BEG)],
|
||||||
|
[[2, 0], :on_tstring_content, "\\x", state(:EXPR_BEG)],
|
||||||
|
[[2, 2], :on_tstring_content, "yz", state(:EXPR_BEG)],
|
||||||
|
[[2, 4], :on_tstring_end, ")", state(:EXPR_END)],
|
||||||
|
]
|
||||||
|
assert_lexer(expected, code)
|
||||||
|
|
||||||
|
code = "%Q(\n\\xyz)"
|
||||||
|
expected = [
|
||||||
|
[[1, 0], :on_tstring_beg, "%Q(", state(:EXPR_BEG)],
|
||||||
|
[[1, 3], :on_tstring_content, "\n", state(:EXPR_BEG)],
|
||||||
|
[[2, 0], :on_tstring_content, "\\x", state(:EXPR_BEG)],
|
||||||
|
[[2, 2], :on_tstring_content, "yz", state(:EXPR_BEG)],
|
||||||
|
[[2, 4], :on_tstring_end, ")", state(:EXPR_END)],
|
||||||
|
]
|
||||||
|
assert_lexer(expected, code)
|
||||||
|
|
||||||
|
code = ":\"\n\\xyz\""
|
||||||
|
expected = [
|
||||||
|
[[1, 0], :on_symbeg, ":\"", state(:EXPR_FNAME)],
|
||||||
|
[[1, 2], :on_tstring_content, "\n", state(:EXPR_FNAME)],
|
||||||
|
[[2, 0], :on_tstring_content, "\\x", state(:EXPR_FNAME)],
|
||||||
|
[[2, 2], :on_tstring_content, "yz", state(:EXPR_FNAME)],
|
||||||
|
[[2, 4], :on_tstring_end, "\"", state(:EXPR_END)],
|
||||||
|
]
|
||||||
|
assert_lexer(expected, code)
|
||||||
|
end
|
||||||
|
|
||||||
def assert_lexer(expected, code)
|
def assert_lexer(expected, code)
|
||||||
assert_equal(code, Ripper.tokenize(code).join(""))
|
assert_equal(code, Ripper.tokenize(code).join(""))
|
||||||
assert_equal(expected, result = Ripper.lex(code),
|
assert_equal(expected, result = Ripper.lex(code),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user