Always issue deprecation warning when calling Regexp.new with 3rd positional argument
Previously, only certain values of the 3rd argument triggered a deprecation warning. First step for fix for bug #18797. Support for the 3rd argument will be removed after the release of Ruby 3.2. Fix minor fallout discovered by the tests. Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
This commit is contained in:
parent
9dcee2d80e
commit
7e8fa06022
Notes:
git
2022-12-22 19:50:45 +00:00
@ -216,7 +216,7 @@ module Racc
|
|||||||
end
|
end
|
||||||
i = ii
|
i = ii
|
||||||
end
|
end
|
||||||
Regexp.compile(map, nil, 'n')
|
Regexp.compile(map, Regexp::NOENCODING)
|
||||||
end
|
end
|
||||||
|
|
||||||
def set_table(entries, dummy, tbl, chk, ptr)
|
def set_table(entries, dummy, tbl, chk, ptr)
|
||||||
|
24
re.c
24
re.c
@ -3759,10 +3759,11 @@ struct reg_init_args {
|
|||||||
|
|
||||||
static VALUE reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args);
|
static VALUE reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args);
|
||||||
static VALUE reg_init_args(VALUE self, VALUE str, rb_encoding *enc, int flags);
|
static VALUE reg_init_args(VALUE self, VALUE str, rb_encoding *enc, int flags);
|
||||||
|
void rb_warn_deprecated_to_remove(const char *removal, const char *fmt, const char *suggest, ...);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* call-seq:
|
* call-seq:
|
||||||
* Regexp.new(string, options = 0, n_flag = nil, timeout: nil) -> regexp
|
* Regexp.new(string, options = 0, timeout: nil) -> regexp
|
||||||
* Regexp.new(regexp, timeout: nil) -> regexp
|
* Regexp.new(regexp, timeout: nil) -> regexp
|
||||||
*
|
*
|
||||||
* With argument +string+ given, returns a new regexp with the given string
|
* With argument +string+ given, returns a new regexp with the given string
|
||||||
@ -3780,24 +3781,18 @@ static VALUE reg_init_args(VALUE self, VALUE str, rb_encoding *enc, int flags);
|
|||||||
* Regexp.new('foo', 'im') # => /foo/im
|
* Regexp.new('foo', 'im') # => /foo/im
|
||||||
*
|
*
|
||||||
* - The logical OR of one or more of the constants
|
* - The logical OR of one or more of the constants
|
||||||
* Regexp::EXTENDED, Regexp::IGNORECASE, and Regexp::MULTILINE:
|
* Regexp::EXTENDED, Regexp::IGNORECASE, Regexp::MULTILINE, and
|
||||||
|
* Regexp::NOENCODING:
|
||||||
*
|
*
|
||||||
* Regexp.new('foo', Regexp::IGNORECASE) # => /foo/i
|
* Regexp.new('foo', Regexp::IGNORECASE) # => /foo/i
|
||||||
* Regexp.new('foo', Regexp::EXTENDED) # => /foo/x
|
* Regexp.new('foo', Regexp::EXTENDED) # => /foo/x
|
||||||
* Regexp.new('foo', Regexp::MULTILINE) # => /foo/m
|
* Regexp.new('foo', Regexp::MULTILINE) # => /foo/m
|
||||||
|
* Regexp.new('foo', Regexp::NOENCODING) # => /foo/n
|
||||||
* flags = Regexp::IGNORECASE | Regexp::EXTENDED | Regexp::MULTILINE
|
* flags = Regexp::IGNORECASE | Regexp::EXTENDED | Regexp::MULTILINE
|
||||||
* Regexp.new('foo', flags) # => /foo/mix
|
* Regexp.new('foo', flags) # => /foo/mix
|
||||||
*
|
*
|
||||||
* - +nil+ or +false+, which is ignored.
|
* - +nil+ or +false+, which is ignored.
|
||||||
*
|
*
|
||||||
* If optional argument +n_flag+ if it is a string starts with
|
|
||||||
* <code>'n'</code> or <code>'N'</code>, the encoding of +string+ is
|
|
||||||
* ignored and the new regexp encoding is fixed to +ASCII-8BIT+ or
|
|
||||||
* +US-ASCII+, by its content.
|
|
||||||
*
|
|
||||||
* Regexp.new('foo', nil, 'n') # => /foo/n
|
|
||||||
* Regexp.new("\u3042", nil, 'n') # => /\xE3\x81\x82/n
|
|
||||||
*
|
|
||||||
* If optional keyword argument +timeout+ is given,
|
* If optional keyword argument +timeout+ is given,
|
||||||
* its float value overrides the timeout interval for the class,
|
* its float value overrides the timeout interval for the class,
|
||||||
* Regexp.timeout.
|
* Regexp.timeout.
|
||||||
@ -3841,7 +3836,7 @@ reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args)
|
|||||||
VALUE str, src, opts = Qundef, n_flag = Qundef, kwargs;
|
VALUE str, src, opts = Qundef, n_flag = Qundef, kwargs;
|
||||||
VALUE re = Qnil;
|
VALUE re = Qnil;
|
||||||
|
|
||||||
rb_scan_args(argc, argv, "12:", &src, &opts, &n_flag, &kwargs);
|
argc = rb_scan_args(argc, argv, "12:", &src, &opts, &n_flag, &kwargs);
|
||||||
|
|
||||||
args->timeout = Qnil;
|
args->timeout = Qnil;
|
||||||
if (!NIL_P(kwargs)) {
|
if (!NIL_P(kwargs)) {
|
||||||
@ -3852,6 +3847,10 @@ reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args)
|
|||||||
rb_get_kwargs(kwargs, keywords, 0, 1, &args->timeout);
|
rb_get_kwargs(kwargs, keywords, 0, 1, &args->timeout);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (argc == 3) {
|
||||||
|
rb_warn_deprecated_to_remove("3.3", "3rd argument to Regexp.new", "2nd argument");
|
||||||
|
}
|
||||||
|
|
||||||
if (RB_TYPE_P(src, T_REGEXP)) {
|
if (RB_TYPE_P(src, T_REGEXP)) {
|
||||||
re = src;
|
re = src;
|
||||||
|
|
||||||
@ -3876,9 +3875,6 @@ reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args)
|
|||||||
enc = rb_ascii8bit_encoding();
|
enc = rb_ascii8bit_encoding();
|
||||||
flags |= ARG_ENCODING_NONE;
|
flags |= ARG_ENCODING_NONE;
|
||||||
}
|
}
|
||||||
else {
|
|
||||||
rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, "encoding option is ignored - %s", kcode);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
str = StringValue(src);
|
str = StringValue(src);
|
||||||
}
|
}
|
||||||
|
@ -197,48 +197,50 @@ describe :regexp_new_string, shared: true do
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
it "ignores the third argument if it is 'e' or 'euc' (case-insensitive)" do
|
ruby_version_is ""..."3.2" do
|
||||||
-> {
|
it "ignores the third argument if it is 'e' or 'euc' (case-insensitive)" do
|
||||||
Regexp.send(@method, 'Hi', nil, 'e').encoding.should == Encoding::US_ASCII
|
-> {
|
||||||
Regexp.send(@method, 'Hi', nil, 'euc').encoding.should == Encoding::US_ASCII
|
Regexp.send(@method, 'Hi', nil, 'e').encoding.should == Encoding::US_ASCII
|
||||||
Regexp.send(@method, 'Hi', nil, 'E').encoding.should == Encoding::US_ASCII
|
Regexp.send(@method, 'Hi', nil, 'euc').encoding.should == Encoding::US_ASCII
|
||||||
Regexp.send(@method, 'Hi', nil, 'EUC').encoding.should == Encoding::US_ASCII
|
Regexp.send(@method, 'Hi', nil, 'E').encoding.should == Encoding::US_ASCII
|
||||||
}.should complain(/encoding option is ignored/)
|
Regexp.send(@method, 'Hi', nil, 'EUC').encoding.should == Encoding::US_ASCII
|
||||||
end
|
}.should complain(/encoding option is ignored/)
|
||||||
|
end
|
||||||
|
|
||||||
it "ignores the third argument if it is 's' or 'sjis' (case-insensitive)" do
|
it "ignores the third argument if it is 's' or 'sjis' (case-insensitive)" do
|
||||||
-> {
|
-> {
|
||||||
Regexp.send(@method, 'Hi', nil, 's').encoding.should == Encoding::US_ASCII
|
Regexp.send(@method, 'Hi', nil, 's').encoding.should == Encoding::US_ASCII
|
||||||
Regexp.send(@method, 'Hi', nil, 'sjis').encoding.should == Encoding::US_ASCII
|
Regexp.send(@method, 'Hi', nil, 'sjis').encoding.should == Encoding::US_ASCII
|
||||||
Regexp.send(@method, 'Hi', nil, 'S').encoding.should == Encoding::US_ASCII
|
Regexp.send(@method, 'Hi', nil, 'S').encoding.should == Encoding::US_ASCII
|
||||||
Regexp.send(@method, 'Hi', nil, 'SJIS').encoding.should == Encoding::US_ASCII
|
Regexp.send(@method, 'Hi', nil, 'SJIS').encoding.should == Encoding::US_ASCII
|
||||||
}.should complain(/encoding option is ignored/)
|
}.should complain(/encoding option is ignored/)
|
||||||
end
|
end
|
||||||
|
|
||||||
it "ignores the third argument if it is 'u' or 'utf8' (case-insensitive)" do
|
it "ignores the third argument if it is 'u' or 'utf8' (case-insensitive)" do
|
||||||
-> {
|
-> {
|
||||||
Regexp.send(@method, 'Hi', nil, 'u').encoding.should == Encoding::US_ASCII
|
Regexp.send(@method, 'Hi', nil, 'u').encoding.should == Encoding::US_ASCII
|
||||||
Regexp.send(@method, 'Hi', nil, 'utf8').encoding.should == Encoding::US_ASCII
|
Regexp.send(@method, 'Hi', nil, 'utf8').encoding.should == Encoding::US_ASCII
|
||||||
Regexp.send(@method, 'Hi', nil, 'U').encoding.should == Encoding::US_ASCII
|
Regexp.send(@method, 'Hi', nil, 'U').encoding.should == Encoding::US_ASCII
|
||||||
Regexp.send(@method, 'Hi', nil, 'UTF8').encoding.should == Encoding::US_ASCII
|
Regexp.send(@method, 'Hi', nil, 'UTF8').encoding.should == Encoding::US_ASCII
|
||||||
}.should complain(/encoding option is ignored/)
|
}.should complain(/encoding option is ignored/)
|
||||||
end
|
end
|
||||||
|
|
||||||
it "uses US_ASCII encoding if third argument is 'n' or 'none' (case insensitive) and only ascii characters" do
|
it "uses US_ASCII encoding if third argument is 'n' or 'none' (case insensitive) and only ascii characters" do
|
||||||
Regexp.send(@method, 'Hi', nil, 'n').encoding.should == Encoding::US_ASCII
|
Regexp.send(@method, 'Hi', nil, 'n').encoding.should == Encoding::US_ASCII
|
||||||
Regexp.send(@method, 'Hi', nil, 'none').encoding.should == Encoding::US_ASCII
|
Regexp.send(@method, 'Hi', nil, 'none').encoding.should == Encoding::US_ASCII
|
||||||
Regexp.send(@method, 'Hi', nil, 'N').encoding.should == Encoding::US_ASCII
|
Regexp.send(@method, 'Hi', nil, 'N').encoding.should == Encoding::US_ASCII
|
||||||
Regexp.send(@method, 'Hi', nil, 'NONE').encoding.should == Encoding::US_ASCII
|
Regexp.send(@method, 'Hi', nil, 'NONE').encoding.should == Encoding::US_ASCII
|
||||||
end
|
end
|
||||||
|
|
||||||
it "uses ASCII_8BIT encoding if third argument is 'n' or 'none' (case insensitive) and non-ascii characters" do
|
it "uses ASCII_8BIT encoding if third argument is 'n' or 'none' (case insensitive) and non-ascii characters" do
|
||||||
a = "(?:[\x8E\xA1-\xFE])"
|
a = "(?:[\x8E\xA1-\xFE])"
|
||||||
str = "\A(?:#{a}|x*)\z"
|
str = "\A(?:#{a}|x*)\z"
|
||||||
|
|
||||||
Regexp.send(@method, str, nil, 'N').encoding.should == Encoding::BINARY
|
Regexp.send(@method, str, nil, 'N').encoding.should == Encoding::BINARY
|
||||||
Regexp.send(@method, str, nil, 'n').encoding.should == Encoding::BINARY
|
Regexp.send(@method, str, nil, 'n').encoding.should == Encoding::BINARY
|
||||||
Regexp.send(@method, str, nil, 'none').encoding.should == Encoding::BINARY
|
Regexp.send(@method, str, nil, 'none').encoding.should == Encoding::BINARY
|
||||||
Regexp.send(@method, str, nil, 'NONE').encoding.should == Encoding::BINARY
|
Regexp.send(@method, str, nil, 'NONE').encoding.should == Encoding::BINARY
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
describe "with escaped characters" do
|
describe "with escaped characters" do
|
||||||
@ -598,8 +600,10 @@ describe :regexp_new_regexp, shared: true do
|
|||||||
Regexp.send(@method, /Hi/n).encoding.should == Encoding::US_ASCII
|
Regexp.send(@method, /Hi/n).encoding.should == Encoding::US_ASCII
|
||||||
end
|
end
|
||||||
|
|
||||||
it "sets the encoding to source String's encoding if the Regexp literal has the 'n' option and the source String is not ASCII only" do
|
ruby_version_is ''...'3.2' do
|
||||||
Regexp.send(@method, Regexp.new("\\xff", nil, 'n')).encoding.should == Encoding::BINARY
|
it "sets the encoding to source String's encoding if the Regexp literal has the 'n' option and the source String is not ASCII only" do
|
||||||
|
Regexp.send(@method, Regexp.new("\\xff", nil, 'n')).encoding.should == Encoding::BINARY
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -34,7 +34,7 @@ class Psych_Unit_Tests < Psych::TestCase
|
|||||||
|
|
||||||
# [ruby-core:34969]
|
# [ruby-core:34969]
|
||||||
def test_regexp_with_n
|
def test_regexp_with_n
|
||||||
assert_cycle(Regexp.new('',0,'n'))
|
assert_cycle(Regexp.new('',Regexp::NOENCODING))
|
||||||
end
|
end
|
||||||
#
|
#
|
||||||
# Tests modified from 00basic.t in Psych.pm
|
# Tests modified from 00basic.t in Psych.pm
|
||||||
|
@ -42,14 +42,14 @@ class TestRegexp < Test::Unit::TestCase
|
|||||||
|
|
||||||
def test_yoshidam_net_20041111_1
|
def test_yoshidam_net_20041111_1
|
||||||
s = "[\xC2\xA0-\xC3\xBE]"
|
s = "[\xC2\xA0-\xC3\xBE]"
|
||||||
r = assert_deprecated_warning(/ignored/) {Regexp.new(s, nil, "u")}
|
r = assert_deprecated_warning(/3\.3/) {Regexp.new(s, nil, "u")}
|
||||||
assert_match(r, "\xC3\xBE")
|
assert_match(r, "\xC3\xBE")
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_yoshidam_net_20041111_2
|
def test_yoshidam_net_20041111_2
|
||||||
assert_raise(RegexpError) do
|
assert_raise(RegexpError) do
|
||||||
s = "[\xFF-\xFF]".force_encoding("utf-8")
|
s = "[\xFF-\xFF]".force_encoding("utf-8")
|
||||||
assert_warning(/ignored/) {Regexp.new(s, nil, "u")}
|
assert_warning(/3\.3/) {Regexp.new(s, nil, "u")}
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -646,13 +646,37 @@ class TestRegexp < Test::Unit::TestCase
|
|||||||
assert_equal(/foo/, assert_no_warning(/ignored/) {Regexp.new(/foo/)})
|
assert_equal(/foo/, assert_no_warning(/ignored/) {Regexp.new(/foo/)})
|
||||||
assert_equal(/foo/, assert_no_warning(/ignored/) {Regexp.new(/foo/, timeout: nil)})
|
assert_equal(/foo/, assert_no_warning(/ignored/) {Regexp.new(/foo/, timeout: nil)})
|
||||||
|
|
||||||
assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", nil, "n").encoding)
|
arg_encoding_none = //n.options # ARG_ENCODING_NONE is implementation defined value
|
||||||
assert_equal("bar", "foobarbaz"[Regexp.new("b..", nil, "n")])
|
|
||||||
assert_equal(//n, Regexp.new("", nil, "n"))
|
|
||||||
|
|
||||||
arg_encoding_none = 32 # ARG_ENCODING_NONE is implementation defined value
|
assert_deprecated_warning('') do
|
||||||
assert_equal(arg_encoding_none, Regexp.new("", nil, "n").options)
|
assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", Regexp::NOENCODING).encoding)
|
||||||
assert_equal(arg_encoding_none, Regexp.new("", nil, "N").options)
|
assert_equal("bar", "foobarbaz"[Regexp.new("b..", Regexp::NOENCODING)])
|
||||||
|
assert_equal(//, Regexp.new(""))
|
||||||
|
assert_equal(//, Regexp.new("", timeout: 1))
|
||||||
|
assert_equal(//n, Regexp.new("", Regexp::NOENCODING))
|
||||||
|
assert_equal(//n, Regexp.new("", Regexp::NOENCODING, timeout: 1))
|
||||||
|
|
||||||
|
assert_equal(arg_encoding_none, Regexp.new("", Regexp::NOENCODING).options)
|
||||||
|
end
|
||||||
|
|
||||||
|
assert_deprecated_warning(/3\.3/) do
|
||||||
|
assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", nil, "n").encoding)
|
||||||
|
end
|
||||||
|
assert_deprecated_warning(/3\.3/) do
|
||||||
|
assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", nil, "n", timeout: 1).encoding)
|
||||||
|
end
|
||||||
|
assert_deprecated_warning(/3\.3/) do
|
||||||
|
assert_equal("bar", "foobarbaz"[Regexp.new("b..", nil, "n")])
|
||||||
|
end
|
||||||
|
assert_deprecated_warning(/3\.3/) do
|
||||||
|
assert_equal(//n, Regexp.new("", nil, "n"))
|
||||||
|
end
|
||||||
|
assert_deprecated_warning(/3\.3/) do
|
||||||
|
assert_equal(arg_encoding_none, Regexp.new("", nil, "n").options)
|
||||||
|
end
|
||||||
|
assert_deprecated_warning(/3\.3/) do
|
||||||
|
assert_equal(arg_encoding_none, Regexp.new("", nil, "N").options)
|
||||||
|
end
|
||||||
|
|
||||||
assert_raise(RegexpError) { Regexp.new(")(") }
|
assert_raise(RegexpError) { Regexp.new(")(") }
|
||||||
assert_raise(RegexpError) { Regexp.new('[\\40000000000') }
|
assert_raise(RegexpError) { Regexp.new('[\\40000000000') }
|
||||||
|
Loading…
x
Reference in New Issue
Block a user