Always issue deprecation warning when calling Regexp.new with 3rd positional argument

Previously, only certain values of the 3rd argument triggered a
deprecation warning.

First step for fix for bug #18797.  Support for the 3rd argument
will be removed after the release of Ruby 3.2.

Fix minor fallout discovered by the tests.

Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
This commit is contained in:
Jeremy Evans 2022-12-20 12:44:11 -08:00
parent 9dcee2d80e
commit 7e8fa06022
Notes: git 2022-12-22 19:50:45 +00:00
5 changed files with 87 additions and 63 deletions

View File

@ -216,7 +216,7 @@ module Racc
end end
i = ii i = ii
end end
Regexp.compile(map, nil, 'n') Regexp.compile(map, Regexp::NOENCODING)
end end
def set_table(entries, dummy, tbl, chk, ptr) def set_table(entries, dummy, tbl, chk, ptr)

24
re.c
View File

@ -3759,10 +3759,11 @@ struct reg_init_args {
static VALUE reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args); static VALUE reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args);
static VALUE reg_init_args(VALUE self, VALUE str, rb_encoding *enc, int flags); static VALUE reg_init_args(VALUE self, VALUE str, rb_encoding *enc, int flags);
void rb_warn_deprecated_to_remove(const char *removal, const char *fmt, const char *suggest, ...);
/* /*
* call-seq: * call-seq:
* Regexp.new(string, options = 0, n_flag = nil, timeout: nil) -> regexp * Regexp.new(string, options = 0, timeout: nil) -> regexp
* Regexp.new(regexp, timeout: nil) -> regexp * Regexp.new(regexp, timeout: nil) -> regexp
* *
* With argument +string+ given, returns a new regexp with the given string * With argument +string+ given, returns a new regexp with the given string
@ -3780,24 +3781,18 @@ static VALUE reg_init_args(VALUE self, VALUE str, rb_encoding *enc, int flags);
* Regexp.new('foo', 'im') # => /foo/im * Regexp.new('foo', 'im') # => /foo/im
* *
* - The logical OR of one or more of the constants * - The logical OR of one or more of the constants
* Regexp::EXTENDED, Regexp::IGNORECASE, and Regexp::MULTILINE: * Regexp::EXTENDED, Regexp::IGNORECASE, Regexp::MULTILINE, and
* Regexp::NOENCODING:
* *
* Regexp.new('foo', Regexp::IGNORECASE) # => /foo/i * Regexp.new('foo', Regexp::IGNORECASE) # => /foo/i
* Regexp.new('foo', Regexp::EXTENDED) # => /foo/x * Regexp.new('foo', Regexp::EXTENDED) # => /foo/x
* Regexp.new('foo', Regexp::MULTILINE) # => /foo/m * Regexp.new('foo', Regexp::MULTILINE) # => /foo/m
* Regexp.new('foo', Regexp::NOENCODING) # => /foo/n
* flags = Regexp::IGNORECASE | Regexp::EXTENDED | Regexp::MULTILINE * flags = Regexp::IGNORECASE | Regexp::EXTENDED | Regexp::MULTILINE
* Regexp.new('foo', flags) # => /foo/mix * Regexp.new('foo', flags) # => /foo/mix
* *
* - +nil+ or +false+, which is ignored. * - +nil+ or +false+, which is ignored.
* *
* If optional argument +n_flag+ if it is a string starts with
* <code>'n'</code> or <code>'N'</code>, the encoding of +string+ is
* ignored and the new regexp encoding is fixed to +ASCII-8BIT+ or
* +US-ASCII+, by its content.
*
* Regexp.new('foo', nil, 'n') # => /foo/n
* Regexp.new("\u3042", nil, 'n') # => /\xE3\x81\x82/n
*
* If optional keyword argument +timeout+ is given, * If optional keyword argument +timeout+ is given,
* its float value overrides the timeout interval for the class, * its float value overrides the timeout interval for the class,
* Regexp.timeout. * Regexp.timeout.
@ -3841,7 +3836,7 @@ reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args)
VALUE str, src, opts = Qundef, n_flag = Qundef, kwargs; VALUE str, src, opts = Qundef, n_flag = Qundef, kwargs;
VALUE re = Qnil; VALUE re = Qnil;
rb_scan_args(argc, argv, "12:", &src, &opts, &n_flag, &kwargs); argc = rb_scan_args(argc, argv, "12:", &src, &opts, &n_flag, &kwargs);
args->timeout = Qnil; args->timeout = Qnil;
if (!NIL_P(kwargs)) { if (!NIL_P(kwargs)) {
@ -3852,6 +3847,10 @@ reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args)
rb_get_kwargs(kwargs, keywords, 0, 1, &args->timeout); rb_get_kwargs(kwargs, keywords, 0, 1, &args->timeout);
} }
if (argc == 3) {
rb_warn_deprecated_to_remove("3.3", "3rd argument to Regexp.new", "2nd argument");
}
if (RB_TYPE_P(src, T_REGEXP)) { if (RB_TYPE_P(src, T_REGEXP)) {
re = src; re = src;
@ -3876,9 +3875,6 @@ reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args)
enc = rb_ascii8bit_encoding(); enc = rb_ascii8bit_encoding();
flags |= ARG_ENCODING_NONE; flags |= ARG_ENCODING_NONE;
} }
else {
rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, "encoding option is ignored - %s", kcode);
}
} }
str = StringValue(src); str = StringValue(src);
} }

View File

@ -197,6 +197,7 @@ describe :regexp_new_string, shared: true do
end end
end end
ruby_version_is ""..."3.2" do
it "ignores the third argument if it is 'e' or 'euc' (case-insensitive)" do it "ignores the third argument if it is 'e' or 'euc' (case-insensitive)" do
-> { -> {
Regexp.send(@method, 'Hi', nil, 'e').encoding.should == Encoding::US_ASCII Regexp.send(@method, 'Hi', nil, 'e').encoding.should == Encoding::US_ASCII
@ -240,6 +241,7 @@ describe :regexp_new_string, shared: true do
Regexp.send(@method, str, nil, 'none').encoding.should == Encoding::BINARY Regexp.send(@method, str, nil, 'none').encoding.should == Encoding::BINARY
Regexp.send(@method, str, nil, 'NONE').encoding.should == Encoding::BINARY Regexp.send(@method, str, nil, 'NONE').encoding.should == Encoding::BINARY
end end
end
describe "with escaped characters" do describe "with escaped characters" do
it "raises a Regexp error if there is a trailing backslash" do it "raises a Regexp error if there is a trailing backslash" do
@ -598,8 +600,10 @@ describe :regexp_new_regexp, shared: true do
Regexp.send(@method, /Hi/n).encoding.should == Encoding::US_ASCII Regexp.send(@method, /Hi/n).encoding.should == Encoding::US_ASCII
end end
ruby_version_is ''...'3.2' do
it "sets the encoding to source String's encoding if the Regexp literal has the 'n' option and the source String is not ASCII only" do it "sets the encoding to source String's encoding if the Regexp literal has the 'n' option and the source String is not ASCII only" do
Regexp.send(@method, Regexp.new("\\xff", nil, 'n')).encoding.should == Encoding::BINARY Regexp.send(@method, Regexp.new("\\xff", nil, 'n')).encoding.should == Encoding::BINARY
end end
end end
end
end end

View File

@ -34,7 +34,7 @@ class Psych_Unit_Tests < Psych::TestCase
# [ruby-core:34969] # [ruby-core:34969]
def test_regexp_with_n def test_regexp_with_n
assert_cycle(Regexp.new('',0,'n')) assert_cycle(Regexp.new('',Regexp::NOENCODING))
end end
# #
# Tests modified from 00basic.t in Psych.pm # Tests modified from 00basic.t in Psych.pm

View File

@ -42,14 +42,14 @@ class TestRegexp < Test::Unit::TestCase
def test_yoshidam_net_20041111_1 def test_yoshidam_net_20041111_1
s = "[\xC2\xA0-\xC3\xBE]" s = "[\xC2\xA0-\xC3\xBE]"
r = assert_deprecated_warning(/ignored/) {Regexp.new(s, nil, "u")} r = assert_deprecated_warning(/3\.3/) {Regexp.new(s, nil, "u")}
assert_match(r, "\xC3\xBE") assert_match(r, "\xC3\xBE")
end end
def test_yoshidam_net_20041111_2 def test_yoshidam_net_20041111_2
assert_raise(RegexpError) do assert_raise(RegexpError) do
s = "[\xFF-\xFF]".force_encoding("utf-8") s = "[\xFF-\xFF]".force_encoding("utf-8")
assert_warning(/ignored/) {Regexp.new(s, nil, "u")} assert_warning(/3\.3/) {Regexp.new(s, nil, "u")}
end end
end end
@ -646,13 +646,37 @@ class TestRegexp < Test::Unit::TestCase
assert_equal(/foo/, assert_no_warning(/ignored/) {Regexp.new(/foo/)}) assert_equal(/foo/, assert_no_warning(/ignored/) {Regexp.new(/foo/)})
assert_equal(/foo/, assert_no_warning(/ignored/) {Regexp.new(/foo/, timeout: nil)}) assert_equal(/foo/, assert_no_warning(/ignored/) {Regexp.new(/foo/, timeout: nil)})
assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", nil, "n").encoding) arg_encoding_none = //n.options # ARG_ENCODING_NONE is implementation defined value
assert_equal("bar", "foobarbaz"[Regexp.new("b..", nil, "n")])
assert_equal(//n, Regexp.new("", nil, "n"))
arg_encoding_none = 32 # ARG_ENCODING_NONE is implementation defined value assert_deprecated_warning('') do
assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", Regexp::NOENCODING).encoding)
assert_equal("bar", "foobarbaz"[Regexp.new("b..", Regexp::NOENCODING)])
assert_equal(//, Regexp.new(""))
assert_equal(//, Regexp.new("", timeout: 1))
assert_equal(//n, Regexp.new("", Regexp::NOENCODING))
assert_equal(//n, Regexp.new("", Regexp::NOENCODING, timeout: 1))
assert_equal(arg_encoding_none, Regexp.new("", Regexp::NOENCODING).options)
end
assert_deprecated_warning(/3\.3/) do
assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", nil, "n").encoding)
end
assert_deprecated_warning(/3\.3/) do
assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", nil, "n", timeout: 1).encoding)
end
assert_deprecated_warning(/3\.3/) do
assert_equal("bar", "foobarbaz"[Regexp.new("b..", nil, "n")])
end
assert_deprecated_warning(/3\.3/) do
assert_equal(//n, Regexp.new("", nil, "n"))
end
assert_deprecated_warning(/3\.3/) do
assert_equal(arg_encoding_none, Regexp.new("", nil, "n").options) assert_equal(arg_encoding_none, Regexp.new("", nil, "n").options)
end
assert_deprecated_warning(/3\.3/) do
assert_equal(arg_encoding_none, Regexp.new("", nil, "N").options) assert_equal(arg_encoding_none, Regexp.new("", nil, "N").options)
end
assert_raise(RegexpError) { Regexp.new(")(") } assert_raise(RegexpError) { Regexp.new(")(") }
assert_raise(RegexpError) { Regexp.new('[\\40000000000') } assert_raise(RegexpError) { Regexp.new('[\\40000000000') }