string.c: split with block
* string.c (rb_str_split_m): yield each split substrings if the block is given, instead of returing the array. [Feature #4780] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@62763 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
d13a2d498c
commit
2258a97fe2
4
NEWS
4
NEWS
@ -93,6 +93,10 @@ with all sufficient information, see the ChangeLog file or Redmine
|
|||||||
|
|
||||||
* added Random.bytes. [Feature #4938]
|
* added Random.bytes. [Feature #4938]
|
||||||
|
|
||||||
|
* String
|
||||||
|
|
||||||
|
* String#split yields each substrings to the block if given. [Feature #4780]
|
||||||
|
|
||||||
=== Stdlib updates (outstanding ones only)
|
=== Stdlib updates (outstanding ones only)
|
||||||
|
|
||||||
* ERB
|
* ERB
|
||||||
|
82
string.c
82
string.c
@ -7602,6 +7602,35 @@ static const char isspacetable[256] = {
|
|||||||
|
|
||||||
#define ascii_isspace(c) isspacetable[(unsigned char)(c)]
|
#define ascii_isspace(c) isspacetable[(unsigned char)(c)]
|
||||||
|
|
||||||
|
static long
|
||||||
|
split_string(VALUE result, VALUE str, long beg, long len, long empty_count)
|
||||||
|
{
|
||||||
|
if (empty_count >= 0 && len == 0) {
|
||||||
|
return empty_count + 1;
|
||||||
|
}
|
||||||
|
if (empty_count > 0) {
|
||||||
|
/* make different substrings */
|
||||||
|
if (result) {
|
||||||
|
do {
|
||||||
|
rb_ary_push(result, str_new_empty(str));
|
||||||
|
} while (--empty_count > 0);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
do {
|
||||||
|
rb_yield(str_new_empty(str));
|
||||||
|
} while (--empty_count > 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
str = rb_str_subseq(str, beg, len);
|
||||||
|
if (result) {
|
||||||
|
rb_ary_push(result, str);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
rb_yield(str);
|
||||||
|
}
|
||||||
|
return empty_count;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* call-seq:
|
* call-seq:
|
||||||
* str.split(pattern=nil, [limit]) -> an_array
|
* str.split(pattern=nil, [limit]) -> an_array
|
||||||
@ -7660,20 +7689,27 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
|
|||||||
VALUE spat;
|
VALUE spat;
|
||||||
VALUE limit;
|
VALUE limit;
|
||||||
enum {awk, string, regexp} split_type;
|
enum {awk, string, regexp} split_type;
|
||||||
long beg, end, i = 0;
|
long beg, end, i = 0, empty_count = -1;
|
||||||
int lim = 0;
|
int lim = 0;
|
||||||
VALUE result, tmp;
|
VALUE result, tmp;
|
||||||
|
|
||||||
|
result = rb_block_given_p() ? Qfalse : Qnil;
|
||||||
if (rb_scan_args(argc, argv, "02", &spat, &limit) == 2) {
|
if (rb_scan_args(argc, argv, "02", &spat, &limit) == 2) {
|
||||||
lim = NUM2INT(limit);
|
lim = NUM2INT(limit);
|
||||||
if (lim <= 0) limit = Qnil;
|
if (lim <= 0) limit = Qnil;
|
||||||
else if (lim == 1) {
|
else if (lim == 1) {
|
||||||
if (RSTRING_LEN(str) == 0)
|
if (RSTRING_LEN(str) == 0)
|
||||||
return rb_ary_new2(0);
|
return result ? rb_ary_new2(0) : str;
|
||||||
return rb_ary_new3(1, rb_str_dup(str));
|
tmp = rb_str_dup(str);
|
||||||
|
if (!result) {
|
||||||
|
rb_yield(tmp);
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
return rb_ary_new3(1, tmp);
|
||||||
}
|
}
|
||||||
i = 1;
|
i = 1;
|
||||||
}
|
}
|
||||||
|
if (NIL_P(limit) && !lim) empty_count = 0;
|
||||||
|
|
||||||
enc = STR_ENC_GET(str);
|
enc = STR_ENC_GET(str);
|
||||||
split_type = regexp;
|
split_type = regexp;
|
||||||
@ -7712,7 +7748,9 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
result = rb_ary_new();
|
#define SPLIT_STR(beg, len) (empty_count = split_string(result, str, beg, len, empty_count))
|
||||||
|
|
||||||
|
if (result) result = rb_ary_new();
|
||||||
beg = 0;
|
beg = 0;
|
||||||
if (split_type == awk) {
|
if (split_type == awk) {
|
||||||
char *ptr = RSTRING_PTR(str);
|
char *ptr = RSTRING_PTR(str);
|
||||||
@ -7736,7 +7774,7 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (ascii_isspace(c)) {
|
else if (ascii_isspace(c)) {
|
||||||
rb_ary_push(result, rb_str_subseq(str, beg, end-beg));
|
SPLIT_STR(beg, end-beg);
|
||||||
skip = 1;
|
skip = 1;
|
||||||
beg = ptr - bptr;
|
beg = ptr - bptr;
|
||||||
if (!NIL_P(limit)) ++i;
|
if (!NIL_P(limit)) ++i;
|
||||||
@ -7763,7 +7801,7 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (rb_isspace(c)) {
|
else if (rb_isspace(c)) {
|
||||||
rb_ary_push(result, rb_str_subseq(str, beg, end-beg));
|
SPLIT_STR(beg, end-beg);
|
||||||
skip = 1;
|
skip = 1;
|
||||||
beg = ptr - bptr;
|
beg = ptr - bptr;
|
||||||
if (!NIL_P(limit)) ++i;
|
if (!NIL_P(limit)) ++i;
|
||||||
@ -7792,8 +7830,7 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
|
|||||||
ptr = t;
|
ptr = t;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
rb_ary_push(result, rb_str_subseq(str, substr_start - str_start,
|
SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start);
|
||||||
(ptr+end) - substr_start));
|
|
||||||
ptr += end + slen;
|
ptr += end + slen;
|
||||||
substr_start = ptr;
|
substr_start = ptr;
|
||||||
if (!NIL_P(limit) && lim <= ++i) break;
|
if (!NIL_P(limit) && lim <= ++i) break;
|
||||||
@ -7812,14 +7849,11 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
|
|||||||
regs = RMATCH_REGS(rb_backref_get());
|
regs = RMATCH_REGS(rb_backref_get());
|
||||||
if (start == end && BEG(0) == END(0)) {
|
if (start == end && BEG(0) == END(0)) {
|
||||||
if (!ptr) {
|
if (!ptr) {
|
||||||
rb_ary_push(result, str_new_empty(str));
|
SPLIT_STR(0, 0);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
else if (last_null == 1) {
|
else if (last_null == 1) {
|
||||||
rb_ary_push(result, rb_str_subseq(str, beg,
|
SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, ptr+len, enc));
|
||||||
rb_enc_fast_mbclen(ptr+beg,
|
|
||||||
ptr+len,
|
|
||||||
enc)));
|
|
||||||
beg = start;
|
beg = start;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@ -7832,37 +7866,23 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
rb_ary_push(result, rb_str_subseq(str, beg, end-beg));
|
SPLIT_STR(beg, end-beg);
|
||||||
beg = start = END(0);
|
beg = start = END(0);
|
||||||
}
|
}
|
||||||
last_null = 0;
|
last_null = 0;
|
||||||
|
|
||||||
for (idx=1; idx < regs->num_regs; idx++) {
|
for (idx=1; idx < regs->num_regs; idx++) {
|
||||||
if (BEG(idx) == -1) continue;
|
if (BEG(idx) == -1) continue;
|
||||||
if (BEG(idx) == END(idx))
|
SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
|
||||||
tmp = str_new_empty(str);
|
|
||||||
else
|
|
||||||
tmp = rb_str_subseq(str, BEG(idx), END(idx)-BEG(idx));
|
|
||||||
rb_ary_push(result, tmp);
|
|
||||||
}
|
}
|
||||||
if (!NIL_P(limit) && lim <= ++i) break;
|
if (!NIL_P(limit) && lim <= ++i) break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (RSTRING_LEN(str) > 0 && (!NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
|
if (RSTRING_LEN(str) > 0 && (!NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
|
||||||
if (RSTRING_LEN(str) == beg)
|
SPLIT_STR(beg, RSTRING_LEN(str)-beg);
|
||||||
tmp = str_new_empty(str);
|
|
||||||
else
|
|
||||||
tmp = rb_str_subseq(str, beg, RSTRING_LEN(str)-beg);
|
|
||||||
rb_ary_push(result, tmp);
|
|
||||||
}
|
|
||||||
if (NIL_P(limit) && lim == 0) {
|
|
||||||
long len;
|
|
||||||
while ((len = RARRAY_LEN(result)) > 0 &&
|
|
||||||
(tmp = RARRAY_AREF(result, len-1), RSTRING_LEN(tmp) == 0))
|
|
||||||
rb_ary_pop(result);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result ? result : str;
|
||||||
}
|
}
|
||||||
|
|
||||||
VALUE
|
VALUE
|
||||||
|
@ -1699,7 +1699,46 @@ CODE
|
|||||||
assert_equal([S("a"), S(""), S("b"), S("c"), S("")], S("a||b|c|").split(S('|'), -1))
|
assert_equal([S("a"), S(""), S("b"), S("c"), S("")], S("a||b|c|").split(S('|'), -1))
|
||||||
|
|
||||||
assert_equal([], "".split(//, 1))
|
assert_equal([], "".split(//, 1))
|
||||||
|
ensure
|
||||||
|
$; = fs
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_split_with_block
|
||||||
|
fs, $; = $;, nil
|
||||||
|
result = []; S(" a b\t c ").split {|s| result << s}
|
||||||
|
assert_equal([S("a"), S("b"), S("c")], result)
|
||||||
|
result = []; S(" a b\t c ").split(S(" ")) {|s| result << s}
|
||||||
|
assert_equal([S("a"), S("b"), S("c")], result)
|
||||||
|
|
||||||
|
result = []; S(" a | b | c ").split(S("|")) {|s| result << s}
|
||||||
|
assert_equal([S(" a "), S(" b "), S(" c ")], result)
|
||||||
|
|
||||||
|
result = []; S("aXXbXXcXX").split(/X./) {|s| result << s}
|
||||||
|
assert_equal([S("a"), S("b"), S("c")], result)
|
||||||
|
|
||||||
|
result = []; S("abc").split(//) {|s| result << s}
|
||||||
|
assert_equal([S("a"), S("b"), S("c")], result)
|
||||||
|
|
||||||
|
result = []; S("a|b|c").split(S('|'), 1) {|s| result << s}
|
||||||
|
assert_equal([S("a|b|c")], result)
|
||||||
|
|
||||||
|
result = []; S("a|b|c").split(S('|'), 2) {|s| result << s}
|
||||||
|
assert_equal([S("a"), S("b|c")], result)
|
||||||
|
result = []; S("a|b|c").split(S('|'), 3) {|s| result << s}
|
||||||
|
assert_equal([S("a"), S("b"), S("c")], result)
|
||||||
|
|
||||||
|
result = []; S("a|b|c|").split(S('|'), -1) {|s| result << s}
|
||||||
|
assert_equal([S("a"), S("b"), S("c"), S("")], result)
|
||||||
|
result = []; S("a|b|c||").split(S('|'), -1) {|s| result << s}
|
||||||
|
assert_equal([S("a"), S("b"), S("c"), S(""), S("")], result)
|
||||||
|
|
||||||
|
result = []; S("a||b|c|").split(S('|')) {|s| result << s}
|
||||||
|
assert_equal([S("a"), S(""), S("b"), S("c")], result)
|
||||||
|
result = []; S("a||b|c|").split(S('|'), -1) {|s| result << s}
|
||||||
|
assert_equal([S("a"), S(""), S("b"), S("c"), S("")], result)
|
||||||
|
|
||||||
|
result = []; "".split(//, 1) {|s| result << s}
|
||||||
|
assert_equal([], result)
|
||||||
ensure
|
ensure
|
||||||
$; = fs
|
$; = fs
|
||||||
end
|
end
|
||||||
@ -1762,6 +1801,7 @@ CODE
|
|||||||
s.split("b", 1).map(&:upcase!)
|
s.split("b", 1).map(&:upcase!)
|
||||||
assert_equal("abc", s)
|
assert_equal("abc", s)
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_squeeze
|
def test_squeeze
|
||||||
assert_equal(S("abc"), S("aaabbbbccc").squeeze)
|
assert_equal(S("abc"), S("aaabbbbccc").squeeze)
|
||||||
assert_equal(S("aa bb cc"), S("aa bb cc").squeeze(S(" ")))
|
assert_equal(S("aa bb cc"), S("aa bb cc").squeeze(S(" ")))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user