[Bug #20517] Make a multibyte character one token at meta escape

This commit is contained in:
Nobuyoshi Nakada 2024-06-01 19:33:12 +09:00
parent cda69b5910
commit 05553cf22d
No known key found for this signature in database
GPG Key ID: 3582D74E1FEE4465
2 changed files with 56 additions and 4 deletions

View File

@ -8155,7 +8155,11 @@ read_escape(struct parser_params *p, int flags)
}
return read_escape(p, flags|ESCAPE_META) | 0x80;
}
else if (c == -1 || !ISASCII(c)) goto eof;
else if (c == -1) goto eof;
else if (!ISASCII(c)) {
tokskip_mbchar(p);
goto eof;
}
else {
int c2 = escaped_control_code(c);
if (c2) {

View File

@ -302,9 +302,8 @@ world"
[[6, 2], :on_tstring_content, "3\n", state(:EXPR_BEG)],
[[7, 0], :on_heredoc_end, "H1\n", state(:EXPR_BEG)],
]
assert_equal(code, Ripper.tokenize(code).join(""))
assert_equal(expected, result = Ripper.lex(code),
proc {expected.zip(result) {|e, r| break diff(e, r) unless e == r}})
assert_lexer(expected, code)
code = <<~'HEREDOC'
<<-H1
@ -330,6 +329,55 @@ world"
[[6, 0], :on_tstring_content, " 3\n", state(:EXPR_BEG)],
[[7, 0], :on_heredoc_end, "H1\n", state(:EXPR_BEG)],
]
assert_lexer(expected, code)
end
def test_invalid_escape_ctrl_mbchar
code = %["\\C-\u{3042}"]
expected = [
[[1, 0], :on_tstring_beg, '"', state(:EXPR_BEG)],
[[1, 1], :on_tstring_content, "\\C-\u{3042}", state(:EXPR_BEG)],
[[1, 7], :on_tstring_end, '"', state(:EXPR_END)],
]
assert_lexer(expected, code)
end
def test_invalid_escape_meta_mbchar
code = %["\\M-\u{3042}"]
expected = [
[[1, 0], :on_tstring_beg, '"', state(:EXPR_BEG)],
[[1, 1], :on_tstring_content, "\\M-\u{3042}", state(:EXPR_BEG)],
[[1, 7], :on_tstring_end, '"', state(:EXPR_END)],
]
assert_lexer(expected, code)
end
def test_invalid_escape_meta_ctrl_mbchar
code = %["\\M-\\C-\u{3042}"]
expected = [
[[1, 0], :on_tstring_beg, '"', state(:EXPR_BEG)],
[[1, 1], :on_tstring_content, "\\M-\\C-\u{3042}", state(:EXPR_BEG)],
[[1, 10], :on_tstring_end, '"', state(:EXPR_END)],
]
assert_lexer(expected, code)
end
def test_invalid_escape_ctrl_meta_mbchar
code = %["\\C-\\M-\u{3042}"]
expected = [
[[1, 0], :on_tstring_beg, '"', state(:EXPR_BEG)],
[[1, 1], :on_tstring_content, "\\C-\\M-\u{3042}", state(:EXPR_BEG)],
[[1, 10], :on_tstring_end, '"', state(:EXPR_END)],
]
assert_lexer(expected, code)
end
def assert_lexer(expected, code)
assert_equal(code, Ripper.tokenize(code).join(""))
assert_equal(expected, result = Ripper.lex(code),
proc {expected.zip(result) {|e, r| break diff(e, r) unless e == r}})