Always issue deprecation warning when calling Regexp.new with 3rd positional argument

Previously, only certain values of the 3rd argument triggered a
deprecation warning.

First step for fix for bug #18797.  Support for the 3rd argument
will be removed after the release of Ruby 3.2.

Fix minor fallout discovered by the tests.

Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
This commit is contained in:
Jeremy Evans 2022-12-20 12:44:11 -08:00
parent 9dcee2d80e
commit 7e8fa06022
Notes: git 2022-12-22 19:50:45 +00:00
5 changed files with 87 additions and 63 deletions

View File

@ -216,7 +216,7 @@ module Racc
end
i = ii
end
Regexp.compile(map, nil, 'n')
Regexp.compile(map, Regexp::NOENCODING)
end
def set_table(entries, dummy, tbl, chk, ptr)

24
re.c
View File

@ -3759,10 +3759,11 @@ struct reg_init_args {
static VALUE reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args);
static VALUE reg_init_args(VALUE self, VALUE str, rb_encoding *enc, int flags);
void rb_warn_deprecated_to_remove(const char *removal, const char *fmt, const char *suggest, ...);
/*
* call-seq:
* Regexp.new(string, options = 0, n_flag = nil, timeout: nil) -> regexp
* Regexp.new(string, options = 0, timeout: nil) -> regexp
* Regexp.new(regexp, timeout: nil) -> regexp
*
* With argument +string+ given, returns a new regexp with the given string
@ -3780,24 +3781,18 @@ static VALUE reg_init_args(VALUE self, VALUE str, rb_encoding *enc, int flags);
* Regexp.new('foo', 'im') # => /foo/im
*
* - The logical OR of one or more of the constants
* Regexp::EXTENDED, Regexp::IGNORECASE, and Regexp::MULTILINE:
* Regexp::EXTENDED, Regexp::IGNORECASE, Regexp::MULTILINE, and
* Regexp::NOENCODING:
*
* Regexp.new('foo', Regexp::IGNORECASE) # => /foo/i
* Regexp.new('foo', Regexp::EXTENDED) # => /foo/x
* Regexp.new('foo', Regexp::MULTILINE) # => /foo/m
* Regexp.new('foo', Regexp::NOENCODING) # => /foo/n
* flags = Regexp::IGNORECASE | Regexp::EXTENDED | Regexp::MULTILINE
* Regexp.new('foo', flags) # => /foo/mix
*
* - +nil+ or +false+, which is ignored.
*
* If optional argument +n_flag+ if it is a string starts with
* <code>'n'</code> or <code>'N'</code>, the encoding of +string+ is
* ignored and the new regexp encoding is fixed to +ASCII-8BIT+ or
* +US-ASCII+, by its content.
*
* Regexp.new('foo', nil, 'n') # => /foo/n
* Regexp.new("\u3042", nil, 'n') # => /\xE3\x81\x82/n
*
* If optional keyword argument +timeout+ is given,
* its float value overrides the timeout interval for the class,
* Regexp.timeout.
@ -3841,7 +3836,7 @@ reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args)
VALUE str, src, opts = Qundef, n_flag = Qundef, kwargs;
VALUE re = Qnil;
rb_scan_args(argc, argv, "12:", &src, &opts, &n_flag, &kwargs);
argc = rb_scan_args(argc, argv, "12:", &src, &opts, &n_flag, &kwargs);
args->timeout = Qnil;
if (!NIL_P(kwargs)) {
@ -3852,6 +3847,10 @@ reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args)
rb_get_kwargs(kwargs, keywords, 0, 1, &args->timeout);
}
if (argc == 3) {
rb_warn_deprecated_to_remove("3.3", "3rd argument to Regexp.new", "2nd argument");
}
if (RB_TYPE_P(src, T_REGEXP)) {
re = src;
@ -3876,9 +3875,6 @@ reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args)
enc = rb_ascii8bit_encoding();
flags |= ARG_ENCODING_NONE;
}
else {
rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, "encoding option is ignored - %s", kcode);
}
}
str = StringValue(src);
}

View File

@ -197,48 +197,50 @@ describe :regexp_new_string, shared: true do
end
end
it "ignores the third argument if it is 'e' or 'euc' (case-insensitive)" do
-> {
Regexp.send(@method, 'Hi', nil, 'e').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'euc').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'E').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'EUC').encoding.should == Encoding::US_ASCII
}.should complain(/encoding option is ignored/)
end
ruby_version_is ""..."3.2" do
it "ignores the third argument if it is 'e' or 'euc' (case-insensitive)" do
-> {
Regexp.send(@method, 'Hi', nil, 'e').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'euc').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'E').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'EUC').encoding.should == Encoding::US_ASCII
}.should complain(/encoding option is ignored/)
end
it "ignores the third argument if it is 's' or 'sjis' (case-insensitive)" do
-> {
Regexp.send(@method, 'Hi', nil, 's').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'sjis').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'S').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'SJIS').encoding.should == Encoding::US_ASCII
}.should complain(/encoding option is ignored/)
end
it "ignores the third argument if it is 's' or 'sjis' (case-insensitive)" do
-> {
Regexp.send(@method, 'Hi', nil, 's').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'sjis').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'S').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'SJIS').encoding.should == Encoding::US_ASCII
}.should complain(/encoding option is ignored/)
end
it "ignores the third argument if it is 'u' or 'utf8' (case-insensitive)" do
-> {
Regexp.send(@method, 'Hi', nil, 'u').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'utf8').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'U').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'UTF8').encoding.should == Encoding::US_ASCII
}.should complain(/encoding option is ignored/)
end
it "ignores the third argument if it is 'u' or 'utf8' (case-insensitive)" do
-> {
Regexp.send(@method, 'Hi', nil, 'u').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'utf8').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'U').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'UTF8').encoding.should == Encoding::US_ASCII
}.should complain(/encoding option is ignored/)
end
it "uses US_ASCII encoding if third argument is 'n' or 'none' (case insensitive) and only ascii characters" do
Regexp.send(@method, 'Hi', nil, 'n').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'none').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'N').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'NONE').encoding.should == Encoding::US_ASCII
end
it "uses US_ASCII encoding if third argument is 'n' or 'none' (case insensitive) and only ascii characters" do
Regexp.send(@method, 'Hi', nil, 'n').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'none').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'N').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'NONE').encoding.should == Encoding::US_ASCII
end
it "uses ASCII_8BIT encoding if third argument is 'n' or 'none' (case insensitive) and non-ascii characters" do
a = "(?:[\x8E\xA1-\xFE])"
str = "\A(?:#{a}|x*)\z"
it "uses ASCII_8BIT encoding if third argument is 'n' or 'none' (case insensitive) and non-ascii characters" do
a = "(?:[\x8E\xA1-\xFE])"
str = "\A(?:#{a}|x*)\z"
Regexp.send(@method, str, nil, 'N').encoding.should == Encoding::BINARY
Regexp.send(@method, str, nil, 'n').encoding.should == Encoding::BINARY
Regexp.send(@method, str, nil, 'none').encoding.should == Encoding::BINARY
Regexp.send(@method, str, nil, 'NONE').encoding.should == Encoding::BINARY
Regexp.send(@method, str, nil, 'N').encoding.should == Encoding::BINARY
Regexp.send(@method, str, nil, 'n').encoding.should == Encoding::BINARY
Regexp.send(@method, str, nil, 'none').encoding.should == Encoding::BINARY
Regexp.send(@method, str, nil, 'NONE').encoding.should == Encoding::BINARY
end
end
describe "with escaped characters" do
@ -598,8 +600,10 @@ describe :regexp_new_regexp, shared: true do
Regexp.send(@method, /Hi/n).encoding.should == Encoding::US_ASCII
end
it "sets the encoding to source String's encoding if the Regexp literal has the 'n' option and the source String is not ASCII only" do
Regexp.send(@method, Regexp.new("\\xff", nil, 'n')).encoding.should == Encoding::BINARY
ruby_version_is ''...'3.2' do
it "sets the encoding to source String's encoding if the Regexp literal has the 'n' option and the source String is not ASCII only" do
Regexp.send(@method, Regexp.new("\\xff", nil, 'n')).encoding.should == Encoding::BINARY
end
end
end
end

View File

@ -34,7 +34,7 @@ class Psych_Unit_Tests < Psych::TestCase
# [ruby-core:34969]
def test_regexp_with_n
assert_cycle(Regexp.new('',0,'n'))
assert_cycle(Regexp.new('',Regexp::NOENCODING))
end
#
# Tests modified from 00basic.t in Psych.pm

View File

@ -42,14 +42,14 @@ class TestRegexp < Test::Unit::TestCase
def test_yoshidam_net_20041111_1
s = "[\xC2\xA0-\xC3\xBE]"
r = assert_deprecated_warning(/ignored/) {Regexp.new(s, nil, "u")}
r = assert_deprecated_warning(/3\.3/) {Regexp.new(s, nil, "u")}
assert_match(r, "\xC3\xBE")
end
def test_yoshidam_net_20041111_2
assert_raise(RegexpError) do
s = "[\xFF-\xFF]".force_encoding("utf-8")
assert_warning(/ignored/) {Regexp.new(s, nil, "u")}
assert_warning(/3\.3/) {Regexp.new(s, nil, "u")}
end
end
@ -646,13 +646,37 @@ class TestRegexp < Test::Unit::TestCase
assert_equal(/foo/, assert_no_warning(/ignored/) {Regexp.new(/foo/)})
assert_equal(/foo/, assert_no_warning(/ignored/) {Regexp.new(/foo/, timeout: nil)})
assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", nil, "n").encoding)
assert_equal("bar", "foobarbaz"[Regexp.new("b..", nil, "n")])
assert_equal(//n, Regexp.new("", nil, "n"))
arg_encoding_none = //n.options # ARG_ENCODING_NONE is implementation defined value
arg_encoding_none = 32 # ARG_ENCODING_NONE is implementation defined value
assert_equal(arg_encoding_none, Regexp.new("", nil, "n").options)
assert_equal(arg_encoding_none, Regexp.new("", nil, "N").options)
assert_deprecated_warning('') do
assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", Regexp::NOENCODING).encoding)
assert_equal("bar", "foobarbaz"[Regexp.new("b..", Regexp::NOENCODING)])
assert_equal(//, Regexp.new(""))
assert_equal(//, Regexp.new("", timeout: 1))
assert_equal(//n, Regexp.new("", Regexp::NOENCODING))
assert_equal(//n, Regexp.new("", Regexp::NOENCODING, timeout: 1))
assert_equal(arg_encoding_none, Regexp.new("", Regexp::NOENCODING).options)
end
assert_deprecated_warning(/3\.3/) do
assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", nil, "n").encoding)
end
assert_deprecated_warning(/3\.3/) do
assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", nil, "n", timeout: 1).encoding)
end
assert_deprecated_warning(/3\.3/) do
assert_equal("bar", "foobarbaz"[Regexp.new("b..", nil, "n")])
end
assert_deprecated_warning(/3\.3/) do
assert_equal(//n, Regexp.new("", nil, "n"))
end
assert_deprecated_warning(/3\.3/) do
assert_equal(arg_encoding_none, Regexp.new("", nil, "n").options)
end
assert_deprecated_warning(/3\.3/) do
assert_equal(arg_encoding_none, Regexp.new("", nil, "N").options)
end
assert_raise(RegexpError) { Regexp.new(")(") }
assert_raise(RegexpError) { Regexp.new('[\\40000000000') }