From e048a073a3cba04576b8f6a1673c283e4e20cd90 Mon Sep 17 00:00:00 2001 From: Shugo Maeda Date: Wed, 12 Jun 2024 11:35:53 +0900 Subject: [PATCH] Add MatchData#bytebegin and MatchData#byteend These methods return the byte-based offset of the beginning or end of the specified match. [Feature #20576] --- doc/matchdata/bytebegin.rdoc | 30 ++++++++++++++++++++++ doc/matchdata/byteend.rdoc | 30 ++++++++++++++++++++++ re.c | 50 ++++++++++++++++++++++++++++++++++++ test/ruby/test_regexp.rb | 10 ++++++++ 4 files changed, 120 insertions(+) create mode 100644 doc/matchdata/bytebegin.rdoc create mode 100644 doc/matchdata/byteend.rdoc diff --git a/doc/matchdata/bytebegin.rdoc b/doc/matchdata/bytebegin.rdoc new file mode 100644 index 0000000000..5b40a7ef73 --- /dev/null +++ b/doc/matchdata/bytebegin.rdoc @@ -0,0 +1,30 @@ +Returns the offset (in bytes) of the beginning of the specified match. + +When non-negative integer argument +n+ is given, +returns the offset of the beginning of the nth match: + + m = /(.)(.)(\d+)(\d)/.match("THX1138.") + # => # + m[0] # => "HX1138" + m.bytebegin(0) # => 1 + m[3] # => "113" + m.bytebegin(3) # => 3 + + m = /(т)(е)(с)/.match('тест') + # => # + m[0] # => "тес" + m.bytebegin(0) # => 0 + m[3] # => "с" + m.bytebegin(3) # => 4 + +When string or symbol argument +name+ is given, +returns the offset of the beginning for the named match: + + m = /(?.)(.)(?.)/.match("hoge") + # => # + m[:foo] # => "h" + m.bytebegin('foo') # => 0 + m[:bar] # => "g" + m.bytebegin(:bar) # => 2 + +Related: MatchData#byteend, MatchData#byteoffset. diff --git a/doc/matchdata/byteend.rdoc b/doc/matchdata/byteend.rdoc new file mode 100644 index 0000000000..eb57664022 --- /dev/null +++ b/doc/matchdata/byteend.rdoc @@ -0,0 +1,30 @@ +Returns the offset (in bytes) of the end of the specified match. + +When non-negative integer argument +n+ is given, +returns the offset of the end of the nth match: + + m = /(.)(.)(\d+)(\d)/.match("THX1138.") + # => # + m[0] # => "HX1138" + m.byteend(0) # => 7 + m[3] # => "113" + m.byteend(3) # => 6 + + m = /(т)(е)(с)/.match('тест') + # => # + m[0] # => "тес" + m.byteend(0) # => 6 + m[3] # => "с" + m.byteend(3) # => 6 + +When string or symbol argument +name+ is given, +returns the offset of the end for the named match: + + m = /(?.)(.)(?.)/.match("hoge") + # => # + m[:foo] # => "h" + m.byteend('foo') # => 1 + m[:bar] # => "g" + m.byteend(:bar) # => 3 + +Related: MatchData#bytebegin, MatchData#byteoffset. diff --git a/re.c b/re.c index c8940ff887..a7985fe302 100644 --- a/re.c +++ b/re.c @@ -1296,6 +1296,54 @@ match_byteoffset(VALUE match, VALUE n) } +/* + * call-seq: + * bytebegin(n) -> integer + * bytebegin(name) -> integer + * + * :include: doc/matchdata/bytebegin.rdoc + * + */ + +static VALUE +match_bytebegin(VALUE match, VALUE n) +{ + int i = match_backref_number(match, n); + struct re_registers *regs = RMATCH_REGS(match); + + match_check(match); + backref_number_check(regs, i); + + if (BEG(i) < 0) + return Qnil; + return LONG2NUM(BEG(i)); +} + + +/* + * call-seq: + * byteend(n) -> integer + * byteend(name) -> integer + * + * :include: doc/matchdata/byteend.rdoc + * + */ + +static VALUE +match_byteend(VALUE match, VALUE n) +{ + int i = match_backref_number(match, n); + struct re_registers *regs = RMATCH_REGS(match); + + match_check(match); + backref_number_check(regs, i); + + if (BEG(i) < 0) + return Qnil; + return LONG2NUM(END(i)); +} + + /* * call-seq: * begin(n) -> integer @@ -4842,6 +4890,8 @@ Init_Regexp(void) rb_define_method(rb_cMatch, "length", match_size, 0); rb_define_method(rb_cMatch, "offset", match_offset, 1); rb_define_method(rb_cMatch, "byteoffset", match_byteoffset, 1); + rb_define_method(rb_cMatch, "bytebegin", match_bytebegin, 1); + rb_define_method(rb_cMatch, "byteend", match_byteend, 1); rb_define_method(rb_cMatch, "begin", match_begin, 1); rb_define_method(rb_cMatch, "end", match_end, 1); rb_define_method(rb_cMatch, "match", match_nth, 1); diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb index 828117f516..04e24b2ded 100644 --- a/test/ruby/test_regexp.rb +++ b/test/ruby/test_regexp.rb @@ -559,16 +559,26 @@ class TestRegexp < Test::Unit::TestCase assert_raise(IndexError) { m.byteoffset(2) } assert_raise(IndexError) { m.begin(2) } assert_raise(IndexError) { m.end(2) } + assert_raise(IndexError) { m.bytebegin(2) } + assert_raise(IndexError) { m.byteend(2) } m = /(?q..)?/.match("foobarbaz") assert_equal([nil, nil], m.byteoffset("x")) assert_equal(nil, m.begin("x")) assert_equal(nil, m.end("x")) + assert_equal(nil, m.bytebegin("x")) + assert_equal(nil, m.byteend("x")) m = /\A\u3042(.)(.)?(.)\z/.match("\u3042\u3043\u3044") assert_equal([3, 6], m.byteoffset(1)) + assert_equal(3, m.bytebegin(1)) + assert_equal(6, m.byteend(1)) assert_equal([nil, nil], m.byteoffset(2)) + assert_equal(nil, m.bytebegin(2)) + assert_equal(nil, m.byteend(2)) assert_equal([6, 9], m.byteoffset(3)) + assert_equal(6, m.bytebegin(3)) + assert_equal(9, m.byteend(3)) end def test_match_to_s