* string.c (tr_setup_table): use C array for characters that fit

in a byte to gain performance.

* string.c (rb_str_delete_bang): ditto.

* string.c (rb_str_squeeze_bang): ditto.

* string.c (rb_str_count): ditto.

* string.c (tr_trans): ditto.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@13812 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
matz 2007-11-03 19:04:53 +00:00
parent e920777bb5
commit 19c4d26c51
4 changed files with 124 additions and 42 deletions

View File

@ -1,3 +1,16 @@
Sun Nov 4 03:58:32 2007 Yukihiro Matsumoto <matz@ruby-lang.org>
* string.c (tr_setup_table): use C array for characters that fit
in a byte to gain performance.
* string.c (rb_str_delete_bang): ditto.
* string.c (rb_str_squeeze_bang): ditto.
* string.c (rb_str_count): ditto.
* string.c (tr_trans): ditto.
Sun Nov 4 00:06:40 2007 Tanaka Akira <akr@fsij.org> Sun Nov 4 00:06:40 2007 Tanaka Akira <akr@fsij.org>
* gc.c (count_objects): ObjectSpace.count_objects implemented. * gc.c (count_objects): ObjectSpace.count_objects implemented.

View File

@ -261,7 +261,7 @@ rb_enc_find(const char *name)
return rb_enc_from_index(idx); return rb_enc_from_index(idx);
} }
static int static inline int
enc_capable(VALUE obj) enc_capable(VALUE obj)
{ {
if (IMMEDIATE_P(obj)) return Qfalse; if (IMMEDIATE_P(obj)) return Qfalse;

123
string.c
View File

@ -3352,12 +3352,13 @@ static VALUE rb_str_delete_bang(int,VALUE*,VALUE);
static VALUE static VALUE
tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
{ {
SIGNED_VALUE trans[256];
rb_encoding *enc; rb_encoding *enc;
struct tr trsrc, trrepl; struct tr trsrc, trrepl;
int cflag = 0; int cflag = 0;
int c, last = 0, modify = 0; int c, last = 0, modify = 0, i;
char *s, *send; char *s, *send;
VALUE hash; VALUE hash = 0;
StringValue(src); StringValue(src);
StringValue(repl); StringValue(repl);
@ -3379,25 +3380,47 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
trsrc.gen = trrepl.gen = 0; trsrc.gen = trrepl.gen = 0;
trsrc.now = trrepl.now = 0; trsrc.now = trrepl.now = 0;
trsrc.max = trrepl.max = 0; trsrc.max = trrepl.max = 0;
hash = rb_hash_new();
if (cflag) { if (cflag) {
for (i=0; i<256; i++) {
trans[i] = 1;
}
while ((c = trnext(&trsrc, enc)) >= 0) { while ((c = trnext(&trsrc, enc)) >= 0) {
if (c < 256) {
trans[c] = -1;
}
else {
if (!hash) hash = rb_hash_new();
rb_hash_aset(hash, INT2NUM(c), Qtrue); rb_hash_aset(hash, INT2NUM(c), Qtrue);
} }
}
while ((c = trnext(&trrepl, enc)) >= 0) while ((c = trnext(&trrepl, enc)) >= 0)
/* retrieve last replacer */; /* retrieve last replacer */;
last = trrepl.now; last = trrepl.now;
for (i=0; i<256; i++) {
if (trans[i] >= 0) {
trans[i] = last;
}
}
} }
else { else {
int r; int r;
for (i=0; i<256; i++) {
trans[i] = -1;
}
while ((c = trnext(&trsrc, enc)) >= 0) { while ((c = trnext(&trsrc, enc)) >= 0) {
r = trnext(&trrepl, enc); r = trnext(&trrepl, enc);
if (r == -1) r = trrepl.now; if (r == -1) r = trrepl.now;
if (c < 256) {
trans[c] = INT2NUM(r);
}
else {
if (!hash) hash = rb_hash_new();
rb_hash_aset(hash, INT2NUM(c), INT2NUM(r)); rb_hash_aset(hash, INT2NUM(c), INT2NUM(r));
} }
} }
}
rb_str_modify(str); rb_str_modify(str);
s = RSTRING_PTR(str); send = RSTRING_END(str); s = RSTRING_PTR(str); send = RSTRING_END(str);
@ -3413,7 +3436,12 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
tlen = clen = rb_enc_codelen(c, enc); tlen = clen = rb_enc_codelen(c, enc);
s += clen; s += clen;
if (c < 256) {
v = trans[c] >= 0 ? trans[c] : Qnil;
}
else {
v = rb_hash_aref(hash, INT2NUM(c)); v = rb_hash_aref(hash, INT2NUM(c));
}
if (!NIL_P(v)) { if (!NIL_P(v)) {
if (!cflag) { if (!cflag) {
c = NUM2INT(v); c = NUM2INT(v);
@ -3447,11 +3475,11 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
} }
else if (rb_enc_mbmaxlen(enc) == 1) { else if (rb_enc_mbmaxlen(enc) == 1) {
while (s < send) { while (s < send) {
VALUE v = rb_hash_aref(hash, INT2FIX(*s)); c = *s;
if (!NIL_P(v)) { if (trans[c] >= 0) {
if (!cflag) { if (!cflag) {
c = FIX2INT(v); c = FIX2INT(trans[c]);
*s = c & 0xff; *s = c;
modify = 1; modify = 1;
} }
} }
@ -3473,7 +3501,12 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
c = rb_enc_codepoint(s, send, enc); c = rb_enc_codepoint(s, send, enc);
tlen = clen = rb_enc_codelen(c, enc); tlen = clen = rb_enc_codelen(c, enc);
if (c < 256) {
v = trans[c] >= 0 ? trans[c] : Qnil;
}
else {
v = rb_hash_aref(hash, INT2NUM(c)); v = rb_hash_aref(hash, INT2NUM(c));
}
if (!NIL_P(v)) { if (!NIL_P(v)) {
if (!cflag) { if (!cflag) {
c = NUM2INT(v); c = NUM2INT(v);
@ -3552,17 +3585,39 @@ rb_str_tr(VALUE str, VALUE src, VALUE repl)
} }
static void static void
tr_setup_table(VALUE str, VALUE *tablep, VALUE *ctablep, rb_encoding *enc) tr_setup_table(VALUE str, char stable[256],
VALUE *tablep, VALUE *ctablep, rb_encoding *enc)
{ {
char buf[256];
struct tr tr; struct tr tr;
int c; int c;
VALUE table, ptable; VALUE table = 0, ptable;
int i, cflag = 0;
tr.p = RSTRING_PTR(str); tr.pend = tr.p + RSTRING_LEN(str); tr.p = RSTRING_PTR(str); tr.pend = tr.p + RSTRING_LEN(str);
tr.gen = tr.now = tr.max = 0; tr.gen = tr.now = tr.max = 0;
table = rb_hash_new();
if (RSTRING_LEN(str) > 1 && RSTRING_PTR(str)[0] == '^') { if (RSTRING_LEN(str) > 1 && RSTRING_PTR(str)[0] == '^') {
cflag = 1;
tr.p++; tr.p++;
}
for (i=0; i<256; i++) {
stable[i] = 1;
}
for (i=0; i<256; i++) {
buf[i] = cflag;
}
while ((c = trnext(&tr, enc)) >= 0) {
if (c < 256) {
buf[c & 0xff] = !cflag;
}
else {
VALUE key = INT2NUM(c);
if (!table) {
table = rb_hash_new();
if (cflag) {
ptable = *ctablep; ptable = *ctablep;
*ctablep = table; *ctablep = table;
} }
@ -3570,17 +3625,35 @@ tr_setup_table(VALUE str, VALUE *tablep, VALUE *ctablep, rb_encoding *enc)
ptable = *tablep; ptable = *tablep;
*tablep = table; *tablep = table;
} }
}
while ((c = trnext(&tr, enc)) >= 0) {
VALUE key = INT2NUM(c);
if (!ptable || !NIL_P(rb_hash_aref(ptable, key))) { if (!ptable || !NIL_P(rb_hash_aref(ptable, key))) {
rb_hash_aset(table, key, Qtrue); rb_hash_aset(table, key, Qtrue);
} }
} }
} }
for (i=0; i<256; i++) {
stable[i] = stable[i] && buf[i];
}
}
static int
tr_find(int c, char table[256], VALUE del, VALUE nodel)
{
if (c < 256) {
return table[c] ? Qtrue : Qfalse;
}
else {
VALUE v = INT2NUM(c);
if ((del && !NIL_P(rb_hash_aref(del, v))) &&
(!nodel || NIL_P(rb_hash_aref(nodel, v)))) {
return Qtrue;
}
return Qfalse;
}
}
/* /*
* call-seq: * call-seq:
* str.delete!([other_str]+) => str or nil * str.delete!([other_str]+) => str or nil
@ -3592,6 +3665,7 @@ tr_setup_table(VALUE str, VALUE *tablep, VALUE *ctablep, rb_encoding *enc)
static VALUE static VALUE
rb_str_delete_bang(int argc, VALUE *argv, VALUE str) rb_str_delete_bang(int argc, VALUE *argv, VALUE str)
{ {
char squeez[256];
rb_encoding *enc = 0; rb_encoding *enc = 0;
char *s, *send, *t; char *s, *send, *t;
VALUE del = 0, nodel = 0; VALUE del = 0, nodel = 0;
@ -3606,7 +3680,7 @@ rb_str_delete_bang(int argc, VALUE *argv, VALUE str)
StringValue(s); StringValue(s);
enc = rb_enc_check(str, s); enc = rb_enc_check(str, s);
tr_setup_table(s, &del, &nodel, enc); tr_setup_table(s, squeez, &del, &nodel, enc);
} }
rb_str_modify(str); rb_str_modify(str);
@ -3616,10 +3690,8 @@ rb_str_delete_bang(int argc, VALUE *argv, VALUE str)
while (s < send) { while (s < send) {
int c = rb_enc_codepoint(s, send, enc); int c = rb_enc_codepoint(s, send, enc);
int clen = rb_enc_codelen(c, enc); int clen = rb_enc_codelen(c, enc);
VALUE v = INT2NUM(c);
if ((del && !NIL_P(rb_hash_aref(del, v))) && if (tr_find(c, squeez, del, nodel)) {
(!nodel || NIL_P(rb_hash_aref(nodel, v)))) {
modify = 1; modify = 1;
} }
else { else {
@ -3670,6 +3742,7 @@ rb_str_delete(int argc, VALUE *argv, VALUE str)
static VALUE static VALUE
rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str) rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str)
{ {
char squeez[256];
rb_encoding *enc = 0; rb_encoding *enc = 0;
VALUE del = 0, nodel = 0; VALUE del = 0, nodel = 0;
char *s, *send, *t; char *s, *send, *t;
@ -3685,7 +3758,7 @@ rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str)
StringValue(s); StringValue(s);
enc = rb_enc_check(str, s); enc = rb_enc_check(str, s);
tr_setup_table(s, &del, &nodel, enc); tr_setup_table(s, squeez, &del, &nodel, enc);
} }
} }
@ -3697,11 +3770,8 @@ rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str)
while (s < send) { while (s < send) {
int c = rb_enc_codepoint(s, send, enc); int c = rb_enc_codepoint(s, send, enc);
int clen = rb_enc_codelen(c, enc); int clen = rb_enc_codelen(c, enc);
VALUE v = INT2NUM(c);
if (c != save || if (c != save || !tr_find(c, squeez, del, nodel)) {
((del && NIL_P(rb_hash_aref(del, v))) &&
(!nodel || NIL_P(rb_hash_aref(nodel, v))))) {
if (t != s) rb_enc_mbcput(c, t, enc); if (t != s) rb_enc_mbcput(c, t, enc);
save = c; save = c;
t += clen; t += clen;
@ -3799,6 +3869,7 @@ rb_str_tr_s(VALUE str, VALUE src, VALUE repl)
static VALUE static VALUE
rb_str_count(int argc, VALUE *argv, VALUE str) rb_str_count(int argc, VALUE *argv, VALUE str)
{ {
char table[256];
rb_encoding *enc = 0; rb_encoding *enc = 0;
VALUE del = 0, nodel = 0; VALUE del = 0, nodel = 0;
char *s, *send; char *s, *send;
@ -3812,7 +3883,7 @@ rb_str_count(int argc, VALUE *argv, VALUE str)
StringValue(s); StringValue(s);
enc = rb_enc_check(str, s); enc = rb_enc_check(str, s);
tr_setup_table(s, &del, &nodel, enc); tr_setup_table(s, table, &del, &nodel, enc);
} }
s = RSTRING_PTR(str); s = RSTRING_PTR(str);
@ -3822,10 +3893,8 @@ rb_str_count(int argc, VALUE *argv, VALUE str)
while (s < send) { while (s < send) {
int c = rb_enc_codepoint(s, send, enc); int c = rb_enc_codepoint(s, send, enc);
int clen = rb_enc_codelen(c, enc); int clen = rb_enc_codelen(c, enc);
VALUE v = INT2NUM(c);
if ((del && !NIL_P(rb_hash_aref(del, v))) && if (tr_find(c, table, del, nodel)) {
(!nodel || NIL_P(rb_hash_aref(nodel, v)))) {
i++; i++;
} }
s += clen; s += clen;

View File

@ -1,7 +1,7 @@
#define RUBY_VERSION "1.9.0" #define RUBY_VERSION "1.9.0"
#define RUBY_RELEASE_DATE "2007-11-03" #define RUBY_RELEASE_DATE "2007-11-04"
#define RUBY_VERSION_CODE 190 #define RUBY_VERSION_CODE 190
#define RUBY_RELEASE_CODE 20071103 #define RUBY_RELEASE_CODE 20071104
#define RUBY_PATCHLEVEL 0 #define RUBY_PATCHLEVEL 0
#define RUBY_VERSION_MAJOR 1 #define RUBY_VERSION_MAJOR 1
@ -9,7 +9,7 @@
#define RUBY_VERSION_TEENY 0 #define RUBY_VERSION_TEENY 0
#define RUBY_RELEASE_YEAR 2007 #define RUBY_RELEASE_YEAR 2007
#define RUBY_RELEASE_MONTH 11 #define RUBY_RELEASE_MONTH 11
#define RUBY_RELEASE_DAY 3 #define RUBY_RELEASE_DAY 4
#ifdef RUBY_EXTERN #ifdef RUBY_EXTERN
RUBY_EXTERN const char ruby_version[]; RUBY_EXTERN const char ruby_version[];