From d89280e8bf6496aa83326b5f9c293724bd1cc1e9 Mon Sep 17 00:00:00 2001 From: Dustin Brown Date: Wed, 6 Dec 2023 19:25:29 -0800 Subject: [PATCH] Copy encoding flags when copying a regex [Bug #20039] * :bug: Fixes [Bug #20039](https://bugs.ruby-lang.org/issues/20039) When a Regexp is initialized with another Regexp, we simply copy the properties from the original. However, the flags on the original were not being copied correctly. This caused an issue when the original had multibyte characters and was being compared with an ASCII string. Without the forced encoding flag (`KCODE_FIXED`) transferred on to the new Regexp, the comparison would fail. See the included test for an example. Co-authored-by: Nobuyoshi Nakada --- re.c | 2 ++ test/ruby/test_regexp.rb | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/re.c b/re.c index 86090fc00f..d832fb3ba0 100644 --- a/re.c +++ b/re.c @@ -3853,6 +3853,8 @@ reg_copy(VALUE copy, VALUE orig) RB_OBJ_WRITE(copy, &RREGEXP(copy)->src, RREGEXP(orig)->src); RREGEXP_PTR(copy)->timelimit = RREGEXP_PTR(orig)->timelimit; rb_enc_copy(copy, orig); + FL_SET_RAW(copy, FL_TEST_RAW(orig, KCODE_FIXED|REG_ENCODING_NONE)); + return copy; } diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb index 7dff4c25f2..4d94702502 100644 --- a/test/ruby/test_regexp.rb +++ b/test/ruby/test_regexp.rb @@ -1936,6 +1936,16 @@ class TestRegexp < Test::Unit::TestCase assert_equal("123456789".match(/(?:x?\dx?){2,}/)[0], "123456789") end + def test_encoding_flags_are_preserved_when_initialized_with_another_regexp + re = Regexp.new("\u2018hello\u2019".encode("UTF-8")) + str = "".encode("US-ASCII") + + assert_nothing_raised do + str.match?(re) + str.match?(Regexp.new(re)) + end + end + def test_bug_19537 # [Bug #19537] str = 'aac' re = '^([ab]{1,3})(a?)*$'