Always issue deprecation warning when calling Regexp.new with 3rd positional argument

Previously, only certain values of the 3rd argument triggered a
deprecation warning.

First step for fix for bug #18797.  Support for the 3rd argument
will be removed after the release of Ruby 3.2.

Fix minor fallout discovered by the tests.

Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
This commit is contained in:
Jeremy Evans 2022-12-20 12:44:11 -08:00
parent 9dcee2d80e
commit 7e8fa06022
Notes: git 2022-12-22 19:50:45 +00:00
5 changed files with 87 additions and 63 deletions

View File

@ -216,7 +216,7 @@ module Racc
end end
i = ii i = ii
end end
Regexp.compile(map, nil, 'n') Regexp.compile(map, Regexp::NOENCODING)
end end
def set_table(entries, dummy, tbl, chk, ptr) def set_table(entries, dummy, tbl, chk, ptr)

24
re.c
View File

@ -3759,10 +3759,11 @@ struct reg_init_args {
static VALUE reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args); static VALUE reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args);
static VALUE reg_init_args(VALUE self, VALUE str, rb_encoding *enc, int flags); static VALUE reg_init_args(VALUE self, VALUE str, rb_encoding *enc, int flags);
void rb_warn_deprecated_to_remove(const char *removal, const char *fmt, const char *suggest, ...);
/* /*
* call-seq: * call-seq:
* Regexp.new(string, options = 0, n_flag = nil, timeout: nil) -> regexp * Regexp.new(string, options = 0, timeout: nil) -> regexp
* Regexp.new(regexp, timeout: nil) -> regexp * Regexp.new(regexp, timeout: nil) -> regexp
* *
* With argument +string+ given, returns a new regexp with the given string * With argument +string+ given, returns a new regexp with the given string
@ -3780,24 +3781,18 @@ static VALUE reg_init_args(VALUE self, VALUE str, rb_encoding *enc, int flags);
* Regexp.new('foo', 'im') # => /foo/im * Regexp.new('foo', 'im') # => /foo/im
* *
* - The logical OR of one or more of the constants * - The logical OR of one or more of the constants
* Regexp::EXTENDED, Regexp::IGNORECASE, and Regexp::MULTILINE: * Regexp::EXTENDED, Regexp::IGNORECASE, Regexp::MULTILINE, and
* Regexp::NOENCODING:
* *
* Regexp.new('foo', Regexp::IGNORECASE) # => /foo/i * Regexp.new('foo', Regexp::IGNORECASE) # => /foo/i
* Regexp.new('foo', Regexp::EXTENDED) # => /foo/x * Regexp.new('foo', Regexp::EXTENDED) # => /foo/x
* Regexp.new('foo', Regexp::MULTILINE) # => /foo/m * Regexp.new('foo', Regexp::MULTILINE) # => /foo/m
* Regexp.new('foo', Regexp::NOENCODING) # => /foo/n
* flags = Regexp::IGNORECASE | Regexp::EXTENDED | Regexp::MULTILINE * flags = Regexp::IGNORECASE | Regexp::EXTENDED | Regexp::MULTILINE
* Regexp.new('foo', flags) # => /foo/mix * Regexp.new('foo', flags) # => /foo/mix
* *
* - +nil+ or +false+, which is ignored. * - +nil+ or +false+, which is ignored.
* *
* If optional argument +n_flag+ if it is a string starts with
* <code>'n'</code> or <code>'N'</code>, the encoding of +string+ is
* ignored and the new regexp encoding is fixed to +ASCII-8BIT+ or
* +US-ASCII+, by its content.
*
* Regexp.new('foo', nil, 'n') # => /foo/n
* Regexp.new("\u3042", nil, 'n') # => /\xE3\x81\x82/n
*
* If optional keyword argument +timeout+ is given, * If optional keyword argument +timeout+ is given,
* its float value overrides the timeout interval for the class, * its float value overrides the timeout interval for the class,
* Regexp.timeout. * Regexp.timeout.
@ -3841,7 +3836,7 @@ reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args)
VALUE str, src, opts = Qundef, n_flag = Qundef, kwargs; VALUE str, src, opts = Qundef, n_flag = Qundef, kwargs;
VALUE re = Qnil; VALUE re = Qnil;
rb_scan_args(argc, argv, "12:", &src, &opts, &n_flag, &kwargs); argc = rb_scan_args(argc, argv, "12:", &src, &opts, &n_flag, &kwargs);
args->timeout = Qnil; args->timeout = Qnil;
if (!NIL_P(kwargs)) { if (!NIL_P(kwargs)) {
@ -3852,6 +3847,10 @@ reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args)
rb_get_kwargs(kwargs, keywords, 0, 1, &args->timeout); rb_get_kwargs(kwargs, keywords, 0, 1, &args->timeout);
} }
if (argc == 3) {
rb_warn_deprecated_to_remove("3.3", "3rd argument to Regexp.new", "2nd argument");
}
if (RB_TYPE_P(src, T_REGEXP)) { if (RB_TYPE_P(src, T_REGEXP)) {
re = src; re = src;
@ -3876,9 +3875,6 @@ reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args)
enc = rb_ascii8bit_encoding(); enc = rb_ascii8bit_encoding();
flags |= ARG_ENCODING_NONE; flags |= ARG_ENCODING_NONE;
} }
else {
rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, "encoding option is ignored - %s", kcode);
}
} }
str = StringValue(src); str = StringValue(src);
} }

View File

@ -197,48 +197,50 @@ describe :regexp_new_string, shared: true do
end end
end end
it "ignores the third argument if it is 'e' or 'euc' (case-insensitive)" do ruby_version_is ""..."3.2" do
-> { it "ignores the third argument if it is 'e' or 'euc' (case-insensitive)" do
Regexp.send(@method, 'Hi', nil, 'e').encoding.should == Encoding::US_ASCII -> {
Regexp.send(@method, 'Hi', nil, 'euc').encoding.should == Encoding::US_ASCII Regexp.send(@method, 'Hi', nil, 'e').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'E').encoding.should == Encoding::US_ASCII Regexp.send(@method, 'Hi', nil, 'euc').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'EUC').encoding.should == Encoding::US_ASCII Regexp.send(@method, 'Hi', nil, 'E').encoding.should == Encoding::US_ASCII
}.should complain(/encoding option is ignored/) Regexp.send(@method, 'Hi', nil, 'EUC').encoding.should == Encoding::US_ASCII
end }.should complain(/encoding option is ignored/)
end
it "ignores the third argument if it is 's' or 'sjis' (case-insensitive)" do it "ignores the third argument if it is 's' or 'sjis' (case-insensitive)" do
-> { -> {
Regexp.send(@method, 'Hi', nil, 's').encoding.should == Encoding::US_ASCII Regexp.send(@method, 'Hi', nil, 's').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'sjis').encoding.should == Encoding::US_ASCII Regexp.send(@method, 'Hi', nil, 'sjis').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'S').encoding.should == Encoding::US_ASCII Regexp.send(@method, 'Hi', nil, 'S').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'SJIS').encoding.should == Encoding::US_ASCII Regexp.send(@method, 'Hi', nil, 'SJIS').encoding.should == Encoding::US_ASCII
}.should complain(/encoding option is ignored/) }.should complain(/encoding option is ignored/)
end end
it "ignores the third argument if it is 'u' or 'utf8' (case-insensitive)" do it "ignores the third argument if it is 'u' or 'utf8' (case-insensitive)" do
-> { -> {
Regexp.send(@method, 'Hi', nil, 'u').encoding.should == Encoding::US_ASCII Regexp.send(@method, 'Hi', nil, 'u').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'utf8').encoding.should == Encoding::US_ASCII Regexp.send(@method, 'Hi', nil, 'utf8').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'U').encoding.should == Encoding::US_ASCII Regexp.send(@method, 'Hi', nil, 'U').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'UTF8').encoding.should == Encoding::US_ASCII Regexp.send(@method, 'Hi', nil, 'UTF8').encoding.should == Encoding::US_ASCII
}.should complain(/encoding option is ignored/) }.should complain(/encoding option is ignored/)
end end
it "uses US_ASCII encoding if third argument is 'n' or 'none' (case insensitive) and only ascii characters" do it "uses US_ASCII encoding if third argument is 'n' or 'none' (case insensitive) and only ascii characters" do
Regexp.send(@method, 'Hi', nil, 'n').encoding.should == Encoding::US_ASCII Regexp.send(@method, 'Hi', nil, 'n').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'none').encoding.should == Encoding::US_ASCII Regexp.send(@method, 'Hi', nil, 'none').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'N').encoding.should == Encoding::US_ASCII Regexp.send(@method, 'Hi', nil, 'N').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'NONE').encoding.should == Encoding::US_ASCII Regexp.send(@method, 'Hi', nil, 'NONE').encoding.should == Encoding::US_ASCII
end end
it "uses ASCII_8BIT encoding if third argument is 'n' or 'none' (case insensitive) and non-ascii characters" do it "uses ASCII_8BIT encoding if third argument is 'n' or 'none' (case insensitive) and non-ascii characters" do
a = "(?:[\x8E\xA1-\xFE])" a = "(?:[\x8E\xA1-\xFE])"
str = "\A(?:#{a}|x*)\z" str = "\A(?:#{a}|x*)\z"
Regexp.send(@method, str, nil, 'N').encoding.should == Encoding::BINARY Regexp.send(@method, str, nil, 'N').encoding.should == Encoding::BINARY
Regexp.send(@method, str, nil, 'n').encoding.should == Encoding::BINARY Regexp.send(@method, str, nil, 'n').encoding.should == Encoding::BINARY
Regexp.send(@method, str, nil, 'none').encoding.should == Encoding::BINARY Regexp.send(@method, str, nil, 'none').encoding.should == Encoding::BINARY
Regexp.send(@method, str, nil, 'NONE').encoding.should == Encoding::BINARY Regexp.send(@method, str, nil, 'NONE').encoding.should == Encoding::BINARY
end
end end
describe "with escaped characters" do describe "with escaped characters" do
@ -598,8 +600,10 @@ describe :regexp_new_regexp, shared: true do
Regexp.send(@method, /Hi/n).encoding.should == Encoding::US_ASCII Regexp.send(@method, /Hi/n).encoding.should == Encoding::US_ASCII
end end
it "sets the encoding to source String's encoding if the Regexp literal has the 'n' option and the source String is not ASCII only" do ruby_version_is ''...'3.2' do
Regexp.send(@method, Regexp.new("\\xff", nil, 'n')).encoding.should == Encoding::BINARY it "sets the encoding to source String's encoding if the Regexp literal has the 'n' option and the source String is not ASCII only" do
Regexp.send(@method, Regexp.new("\\xff", nil, 'n')).encoding.should == Encoding::BINARY
end
end end
end end
end end

View File

@ -34,7 +34,7 @@ class Psych_Unit_Tests < Psych::TestCase
# [ruby-core:34969] # [ruby-core:34969]
def test_regexp_with_n def test_regexp_with_n
assert_cycle(Regexp.new('',0,'n')) assert_cycle(Regexp.new('',Regexp::NOENCODING))
end end
# #
# Tests modified from 00basic.t in Psych.pm # Tests modified from 00basic.t in Psych.pm

View File

@ -42,14 +42,14 @@ class TestRegexp < Test::Unit::TestCase
def test_yoshidam_net_20041111_1 def test_yoshidam_net_20041111_1
s = "[\xC2\xA0-\xC3\xBE]" s = "[\xC2\xA0-\xC3\xBE]"
r = assert_deprecated_warning(/ignored/) {Regexp.new(s, nil, "u")} r = assert_deprecated_warning(/3\.3/) {Regexp.new(s, nil, "u")}
assert_match(r, "\xC3\xBE") assert_match(r, "\xC3\xBE")
end end
def test_yoshidam_net_20041111_2 def test_yoshidam_net_20041111_2
assert_raise(RegexpError) do assert_raise(RegexpError) do
s = "[\xFF-\xFF]".force_encoding("utf-8") s = "[\xFF-\xFF]".force_encoding("utf-8")
assert_warning(/ignored/) {Regexp.new(s, nil, "u")} assert_warning(/3\.3/) {Regexp.new(s, nil, "u")}
end end
end end
@ -646,13 +646,37 @@ class TestRegexp < Test::Unit::TestCase
assert_equal(/foo/, assert_no_warning(/ignored/) {Regexp.new(/foo/)}) assert_equal(/foo/, assert_no_warning(/ignored/) {Regexp.new(/foo/)})
assert_equal(/foo/, assert_no_warning(/ignored/) {Regexp.new(/foo/, timeout: nil)}) assert_equal(/foo/, assert_no_warning(/ignored/) {Regexp.new(/foo/, timeout: nil)})
assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", nil, "n").encoding) arg_encoding_none = //n.options # ARG_ENCODING_NONE is implementation defined value
assert_equal("bar", "foobarbaz"[Regexp.new("b..", nil, "n")])
assert_equal(//n, Regexp.new("", nil, "n"))
arg_encoding_none = 32 # ARG_ENCODING_NONE is implementation defined value assert_deprecated_warning('') do
assert_equal(arg_encoding_none, Regexp.new("", nil, "n").options) assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", Regexp::NOENCODING).encoding)
assert_equal(arg_encoding_none, Regexp.new("", nil, "N").options) assert_equal("bar", "foobarbaz"[Regexp.new("b..", Regexp::NOENCODING)])
assert_equal(//, Regexp.new(""))
assert_equal(//, Regexp.new("", timeout: 1))
assert_equal(//n, Regexp.new("", Regexp::NOENCODING))
assert_equal(//n, Regexp.new("", Regexp::NOENCODING, timeout: 1))
assert_equal(arg_encoding_none, Regexp.new("", Regexp::NOENCODING).options)
end
assert_deprecated_warning(/3\.3/) do
assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", nil, "n").encoding)
end
assert_deprecated_warning(/3\.3/) do
assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", nil, "n", timeout: 1).encoding)
end
assert_deprecated_warning(/3\.3/) do
assert_equal("bar", "foobarbaz"[Regexp.new("b..", nil, "n")])
end
assert_deprecated_warning(/3\.3/) do
assert_equal(//n, Regexp.new("", nil, "n"))
end
assert_deprecated_warning(/3\.3/) do
assert_equal(arg_encoding_none, Regexp.new("", nil, "n").options)
end
assert_deprecated_warning(/3\.3/) do
assert_equal(arg_encoding_none, Regexp.new("", nil, "N").options)
end
assert_raise(RegexpError) { Regexp.new(")(") } assert_raise(RegexpError) { Regexp.new(")(") }
assert_raise(RegexpError) { Regexp.new('[\\40000000000') } assert_raise(RegexpError) { Regexp.new('[\\40000000000') }