string.c: improve String#scan

* string.c (rb_str_rstrip_bang): improve the performance in 50%
  for a string pattern, and in 10% for a regexp pattern.  get rid
  of making MatchData in middle, which is not used.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@59496 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
nobu 2017-08-04 04:39:53 +00:00
parent 8ba320a410
commit 2b770b4674

View File

@ -8564,35 +8564,49 @@ rb_str_strip(VALUE str)
} }
static VALUE static VALUE
scan_once(VALUE str, VALUE pat, long *start) scan_once(VALUE str, VALUE pat, long *start, int set_backref_str)
{ {
VALUE result, match; VALUE result, match;
struct re_registers *regs; struct re_registers *regs;
int i; int i;
long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
if (rb_pat_search(pat, str, *start, 1) >= 0) { if (pos >= 0) {
match = rb_backref_get(); if (BUILTIN_TYPE(pat) == T_STRING) {
regs = RMATCH_REGS(match); regs = NULL;
if (BEG(0) == END(0)) { end = pos + RSTRING_LEN(pat);
}
else {
match = rb_backref_get();
regs = RMATCH_REGS(match);
end = END(0);
}
if (pos == end) {
rb_encoding *enc = STR_ENC_GET(str); rb_encoding *enc = STR_ENC_GET(str);
/* /*
* Always consume at least one character of the input string * Always consume at least one character of the input string
*/ */
if (RSTRING_LEN(str) > END(0)) if (RSTRING_LEN(str) > end)
*start = END(0)+rb_enc_fast_mbclen(RSTRING_PTR(str)+END(0), *start = end + rb_enc_fast_mbclen(RSTRING_PTR(str) + end,
RSTRING_END(str), enc); RSTRING_END(str), enc);
else else
*start = END(0)+1; *start = end + 1;
} }
else { else {
*start = END(0); *start = end;
} }
if (regs->num_regs == 1) { if (!regs || regs->num_regs == 1) {
return rb_reg_nth_match(0, match); result = rb_str_subseq(str, pos, end - pos);
OBJ_INFECT(result, pat);
return result;
} }
result = rb_ary_new2(regs->num_regs); result = rb_ary_new2(regs->num_regs);
for (i=1; i < regs->num_regs; i++) { for (i=1; i < regs->num_regs; i++) {
rb_ary_push(result, rb_reg_nth_match(i, match)); VALUE s = Qnil;
if (BEG(i) >= 0) {
s = rb_str_subseq(str, BEG(i), END(i)-BEG(i));
OBJ_INFECT(s, pat);
}
rb_ary_push(result, s);
} }
return result; return result;
@ -8645,16 +8659,17 @@ rb_str_scan(VALUE str, VALUE pat)
if (!rb_block_given_p()) { if (!rb_block_given_p()) {
VALUE ary = rb_ary_new(); VALUE ary = rb_ary_new();
while (!NIL_P(result = scan_once(str, pat, &start))) { while (!NIL_P(result = scan_once(str, pat, &start, 0))) {
last = prev; last = prev;
prev = start; prev = start;
rb_ary_push(ary, result); rb_ary_push(ary, result);
} }
if (last >= 0) rb_pat_search(pat, str, last, 1); if (last >= 0) rb_pat_search(pat, str, last, 1);
else rb_backref_set(Qnil);
return ary; return ary;
} }
while (!NIL_P(result = scan_once(str, pat, &start))) { while (!NIL_P(result = scan_once(str, pat, &start, 1))) {
last = prev; last = prev;
prev = start; prev = start;
rb_yield(result); rb_yield(result);