Use UTF-8 encoding for literal extended regexps with UTF-8 characters in comments
Fixes [Bug #19455]
This commit is contained in:
parent
ec211ad54d
commit
a8ba1ddd78
Notes:
git
2023-04-24 02:28:24 +00:00
9
re.c
9
re.c
@ -2948,7 +2948,11 @@ escape_asis:
|
||||
case '#':
|
||||
if (extended_mode && !in_char_class) {
|
||||
/* consume and ignore comment in extended regexp */
|
||||
while ((p < end) && ((c = *p++) != '\n'));
|
||||
while ((p < end) && ((c = *p++) != '\n')) {
|
||||
if ((c & 0x80) && !*encp && enc == rb_utf8_encoding()) {
|
||||
*encp = enc;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
rb_str_buf_cat(buf, (char *)&c, 1);
|
||||
@ -2983,6 +2987,9 @@ escape_asis:
|
||||
switch (c = *p++) {
|
||||
default:
|
||||
if (!(c & 0x80)) break;
|
||||
if (!*encp && enc == rb_utf8_encoding()) {
|
||||
*encp = enc;
|
||||
}
|
||||
--p;
|
||||
/* fallthrough */
|
||||
case '\\':
|
||||
|
@ -187,6 +187,13 @@ class TestRegexp < Test::Unit::TestCase
|
||||
RUBY
|
||||
end
|
||||
|
||||
def test_utf8_comment_in_usascii_extended_regexp_bug_19455
|
||||
assert_separately([], <<-RUBY)
|
||||
assert_equal(Encoding::UTF_8, /(?#\u1000)/x.encoding)
|
||||
assert_equal(Encoding::UTF_8, /#\u1000/x.encoding)
|
||||
RUBY
|
||||
end
|
||||
|
||||
def test_union
|
||||
assert_equal :ok, begin
|
||||
Regexp.union(
|
||||
|
Loading…
x
Reference in New Issue
Block a user