[Bug #21186] multibyte char literal should be a single letter word

This commit is contained in:
Nobuyoshi Nakada 2025-03-17 21:37:00 +09:00 committed by Nobuyoshi Nakada
parent c85dffeee2
commit 1acfb29015
Notes: git 2025-03-17 14:55:29 +00:00
2 changed files with 8 additions and 7 deletions

13
parse.y
View File

@ -9914,6 +9914,7 @@ parse_qmark(struct parser_params *p, int space_seen)
rb_encoding *enc;
register int c;
rb_parser_string_t *lit;
const char *start = p->lex.pcur;
if (IS_END()) {
SET_LEX_STATE(EXPR_VALUE);
@ -9938,13 +9939,11 @@ parse_qmark(struct parser_params *p, int space_seen)
}
newtok(p);
enc = p->enc;
if (!parser_isascii(p)) {
if (tokadd_mbchar(p, c) == -1) return 0;
}
else if ((rb_enc_isalnum(c, p->enc) || c == '_') &&
!lex_eol_p(p) && is_identchar(p, p->lex.pcur, p->lex.pend, p->enc)) {
int w = parser_precise_mbclen(p, start);
if (is_identchar(p, start, p->lex.pend, p->enc) &&
!(lex_eol_ptr_n_p(p, start, w) || !is_identchar(p, start + w, p->lex.pend, p->enc))) {
if (space_seen) {
const char *start = p->lex.pcur - 1, *ptr = start;
const char *ptr = start;
do {
int n = parser_precise_mbclen(p, ptr);
if (n < 0) return -1;
@ -9972,7 +9971,7 @@ parse_qmark(struct parser_params *p, int space_seen)
}
}
else {
tokadd(p, c);
if (tokadd_mbchar(p, c) == -1) return 0;
}
tokfix(p);
lit = STR_NEW3(tok(p), toklen(p), enc, 0);

View File

@ -657,6 +657,8 @@ class TestParse < Test::Unit::TestCase
assert_equal("\u{1234}", eval('?\u{1234}'))
assert_equal("\u{1234}", eval('?\u1234'))
assert_syntax_error('?\u{41 42}', 'Multiple codepoints at single character literal')
assert_syntax_error("?and", /unexpected '\?'/)
assert_syntax_error("?\u1234and", /unexpected '\?'/)
e = assert_syntax_error('"#{?\u123}"', 'invalid Unicode escape')
assert_not_match(/end-of-input/, e.message)