rb_str_{partition,rpartition}_m: Handle /\K/ in pattern
When the pattern given to String#partition and String#rpartition contain a /\K/ (lookbehind) operator, the methods return strings sliced at incorrect positions. ``` # without patch "abcdbce".partition(/b\Kc/) # => ["a", "c", "cdbce"] "abcdbce".rpartition(/b\Kc/) # => ["abcd", "c", "ce"] ``` This patch fixes the problem by using BEG(0) instead of the return value of rb_reg_search. ``` # with patch "abcdbce".partition(/b\Kc/) # => ["ab", "c", "dbce"] "abcdbce".rpartition(/b\Kc/) # => ["abcdb", "c", "e"] ``` As a side-effect this patch makes String#partition 2x faster when the pattern is a costly Regexp by performing Regexp search only once, which was unexpectedly done twice in the original implementation. Fixes [Bug #17119]
This commit is contained in:
parent
69b5241c36
commit
5d71eed1a7
Notes:
git
2020-08-13 20:51:24 +09:00
45
string.c
45
string.c
@ -9940,11 +9940,14 @@ rb_str_partition(VALUE str, VALUE sep)
|
||||
|
||||
sep = get_pat_quoted(sep, 0);
|
||||
if (RB_TYPE_P(sep, T_REGEXP)) {
|
||||
pos = rb_reg_search(sep, str, 0, 0);
|
||||
if (pos < 0) {
|
||||
if (rb_reg_search(sep, str, 0, 0) < 0) {
|
||||
goto failed;
|
||||
}
|
||||
sep = rb_str_subpat(str, sep, INT2FIX(0));
|
||||
VALUE match = rb_backref_get();
|
||||
struct re_registers *regs = RMATCH_REGS(match);
|
||||
|
||||
pos = BEG(0);
|
||||
sep = rb_str_subseq(str, pos, END(0) - pos);
|
||||
}
|
||||
else {
|
||||
pos = rb_str_index(str, sep, 0);
|
||||
@ -9978,37 +9981,33 @@ static VALUE
|
||||
rb_str_rpartition(VALUE str, VALUE sep)
|
||||
{
|
||||
long pos = RSTRING_LEN(str);
|
||||
int regex = FALSE;
|
||||
|
||||
sep = get_pat_quoted(sep, 0);
|
||||
if (RB_TYPE_P(sep, T_REGEXP)) {
|
||||
pos = rb_reg_search(sep, str, pos, 1);
|
||||
regex = TRUE;
|
||||
if (rb_reg_search(sep, str, pos, 1) < 0) {
|
||||
goto failed;
|
||||
}
|
||||
VALUE match = rb_backref_get();
|
||||
struct re_registers *regs = RMATCH_REGS(match);
|
||||
|
||||
pos = BEG(0);
|
||||
sep = rb_str_subseq(str, pos, END(0) - pos);
|
||||
}
|
||||
else {
|
||||
VALUE tmp;
|
||||
|
||||
tmp = rb_check_string_type(sep);
|
||||
if (NIL_P(tmp)) {
|
||||
rb_raise(rb_eTypeError, "type mismatch: %s given",
|
||||
rb_obj_classname(sep));
|
||||
}
|
||||
sep = tmp;
|
||||
pos = rb_str_sublen(str, pos);
|
||||
pos = rb_str_rindex(str, sep, pos);
|
||||
if(pos < 0) {
|
||||
goto failed;
|
||||
}
|
||||
pos = rb_str_offset(str, pos);
|
||||
}
|
||||
if (pos < 0) {
|
||||
return rb_ary_new3(3, str_new_empty(str), str_new_empty(str), rb_str_dup(str));
|
||||
}
|
||||
if (regex) {
|
||||
sep = rb_reg_nth_match(0, rb_backref_get());
|
||||
}
|
||||
else {
|
||||
pos = rb_str_offset(str, pos);
|
||||
}
|
||||
|
||||
return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
|
||||
sep,
|
||||
rb_str_subseq(str, pos+RSTRING_LEN(sep),
|
||||
RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
|
||||
failed:
|
||||
return rb_ary_new3(3, str_new_empty(str), str_new_empty(str), rb_str_dup(str));
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2603,6 +2603,8 @@ CODE
|
||||
assert_equal("hello", hello, bug)
|
||||
|
||||
assert_equal(["", "", "foo"], "foo".partition(/^=*/))
|
||||
|
||||
assert_equal([S("ab"), S("c"), S("dbce")], S("abcdbce").partition(/b\Kc/))
|
||||
end
|
||||
|
||||
def test_rpartition
|
||||
@ -2627,6 +2629,8 @@ CODE
|
||||
hello = "hello"
|
||||
hello.rpartition("hi").map(&:upcase!)
|
||||
assert_equal("hello", hello, bug)
|
||||
|
||||
assert_equal([S("abcdb"), S("c"), S("e")], S("abcdbce").rpartition(/b\Kc/))
|
||||
end
|
||||
|
||||
def test_setter
|
||||
|
Loading…
x
Reference in New Issue
Block a user