* ext/strscan/strscan.c: add taint check.

* ext/strscan/strscan.c: #getch/#get_byte should set regexp registers.
* ext/strscan/strscan.c: remove useless #include directive.
* ext/strscan/strscan.c: refactor struct strscanner.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@2298 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
aamine 2002-03-28 08:53:24 +00:00
parent e1eee80893
commit ff95039936
2 changed files with 203 additions and 144 deletions

View File

@ -1,3 +1,14 @@
Thu Mar 28 18:03:51 2002 Minero Aoki <aamine@loveruby.net>
* ext/strscan/strscan.c: add taint check.
* ext/strscan/strscan.c: #getch/#get_byte should set regexp
registers.
* ext/strscan/strscan.c: remove useless #include directive.
* ext/strscan/strscan.c: refactor struct strscanner.
Wed Mar 27 14:47:32 2002 WATANABE Hirofumi <eban@ruby-lang.org> Wed Mar 27 14:47:32 2002 WATANABE Hirofumi <eban@ruby-lang.org>
* io.c (READ_DATA_PENDING): configure.in has supported for uClibc, * io.c (READ_DATA_PENDING): configure.in has supported for uClibc,

View File

@ -26,37 +26,66 @@
struct strscanner struct strscanner
{ {
/* multi-purpose flags */
unsigned long flags; unsigned long flags;
/* the string to scan */
VALUE str; VALUE str;
long end2;
long beg1; /* scan pointers */
long end1; long prev; /* legal only when MATCHED_P(s) */
long idx; long curr; /* always legal */
/* the regexp register; legal only when last match had successed */
struct re_registers regs; struct re_registers regs;
}; };
#define S_PTR(s) (RSTRING(s->str)->ptr) #define S_PTR(s) (RSTRING(s->str)->ptr)
#define S_LEN(s) (RSTRING(s->str)->len) #define S_LEN(s) (RSTRING(s->str)->len)
#define S_END(s) (S_PTR(s) + S_LEN(s)) #define S_END(s) (S_PTR(s) + S_LEN(s))
#define CURPTR(s) (S_PTR(s) + s->idx) #define CURPTR(s) (S_PTR(s) + s->curr)
#define S_RESTLEN(s) (S_LEN(s) - s->curr)
#define MATCH_FLAG (1UL) #define FLAG_MATCHED (1UL)
#define CLEAR_MATCH_STATUS(s) s->flags &= ~MATCH_FLAG
#define MATCHED(s,i,len) do {\ #define CLEAR_MATCH_STATUS(s) s->flags &= ~FLAG_MATCHED
s->flags |= MATCH_FLAG; \ #define MATCHED(s) s->flags |= FLAG_MATCHED
s->end2 = s->idx; \ #define MATCHED_P(s) (s->flags & FLAG_MATCHED)
s->beg1 = i; \
s->end1 = i + len; \
} while (0)
#define MATCHED_P(s) (s->flags & MATCH_FLAG)
#define GET_SCANNER(obj,var) Data_Get_Struct(obj, struct strscanner, var) #define GET_SCANNER(obj,var) Data_Get_Struct(obj, struct strscanner, var)
#define SCAN_FINISHED(s) ((s)->idx >= RSTRING(p->str)->len) #define SCAN_FINISHED(s) ((s)->curr >= RSTRING(p->str)->len)
static VALUE StringScanner; static VALUE StringScanner;
static VALUE ScanError; static VALUE ScanError;
/* ------------------------------------------------------------- */
static VALUE
infect(str, p)
VALUE str;
struct strscanner *p;
{
OBJ_INFECT(str, p->str);
return str;
}
static VALUE
extract_range(p, beg_i, end_i)
struct strscanner *p;
long beg_i, end_i;
{
return infect(rb_str_new(S_PTR(p) + beg_i, end_i - beg_i), p);
}
static VALUE
extract_beg_len(p, beg_i, len)
struct strscanner *p;
long beg_i, len;
{
return infect(rb_str_new(S_PTR(p) + beg_i, len), p);
}
/* ------------------------------------------------------------- */ /* ------------------------------------------------------------- */
static VALUE static VALUE
@ -113,7 +142,7 @@ strscan_reset(self)
struct strscanner *p; struct strscanner *p;
GET_SCANNER(self, p); GET_SCANNER(self, p);
p->idx = 0; p->curr = 0;
CLEAR_MATCH_STATUS(p); CLEAR_MATCH_STATUS(p);
return self; return self;
} }
@ -126,7 +155,7 @@ strscan_terminate(self)
struct strscanner *p; struct strscanner *p;
GET_SCANNER(self, p); GET_SCANNER(self, p);
p->idx = S_LEN(p); p->curr = S_LEN(p);
CLEAR_MATCH_STATUS(p); CLEAR_MATCH_STATUS(p);
return self; return self;
} }
@ -151,7 +180,7 @@ strscan_set_string(self, str)
Check_Type(str, T_STRING); Check_Type(str, T_STRING);
p->str = rb_str_dup(str); p->str = rb_str_dup(str);
rb_obj_freeze(p->str); rb_obj_freeze(p->str);
p->idx = 0; p->curr = 0;
CLEAR_MATCH_STATUS(p); CLEAR_MATCH_STATUS(p);
return str; return str;
} }
@ -163,7 +192,7 @@ strscan_get_pos(self)
struct strscanner *p; struct strscanner *p;
GET_SCANNER(self, p); GET_SCANNER(self, p);
return INT2FIX(p->idx); return INT2FIX(p->curr);
} }
static VALUE static VALUE
@ -178,11 +207,13 @@ strscan_set_pos(self, v)
if (i < 0) i += S_LEN(p); if (i < 0) i += S_LEN(p);
if (i < 0) rb_raise(rb_eRangeError, "index out of range"); if (i < 0) rb_raise(rb_eRangeError, "index out of range");
if (i > S_LEN(p)) rb_raise(rb_eRangeError, "index out of range"); if (i > S_LEN(p)) rb_raise(rb_eRangeError, "index out of range");
p->idx = i; p->curr = i;
return INT2FIX(i); return INT2FIX(i);
} }
/* I should implement this function? */
#define strscan_prepare_re(re) /* none */
static VALUE static VALUE
strscan_do_scan(self, regex, succptr, getstr, headonly) strscan_do_scan(self, regex, succptr, getstr, headonly)
@ -191,52 +222,43 @@ strscan_do_scan(self, regex, succptr, getstr, headonly)
{ {
struct strscanner *p; struct strscanner *p;
int ret; int ret;
char *pbeg;
long plen;
Check_Type(regex, T_REGEXP); Check_Type(regex, T_REGEXP);
GET_SCANNER(self, p); GET_SCANNER(self, p);
pbeg = CURPTR(p);
plen = S_LEN(p) - p->idx;
CLEAR_MATCH_STATUS(p); CLEAR_MATCH_STATUS(p);
strscan_prepare_re(regex);
if (headonly) { if (headonly) {
ret = re_match(RREGEXP(regex)->ptr, ret = re_match(RREGEXP(regex)->ptr,
pbeg, plen, CURPTR(p), S_RESTLEN(p),
0, 0,
&(p->regs)); &(p->regs));
} }
else { else {
ret = re_search(RREGEXP(regex)->ptr, ret = re_search(RREGEXP(regex)->ptr,
pbeg, plen, CURPTR(p), S_RESTLEN(p),
0, 0,
plen, S_RESTLEN(p),
&(p->regs)); &(p->regs));
} }
if (ret == -2) { if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
rb_raise(ScanError, "regexp buffer overflow"); if (ret < 0) {
return Qnil; /* not reach */
}
else if (ret < 0) {
/* not matched */ /* not matched */
return Qnil; return Qnil;
} }
else {
/* matched */ MATCHED(p);
MATCHED(p, p->idx + p->regs.beg[0], p->prev = p->curr;
p->regs.end[0] - p->regs.beg[0]);
if (succptr) { if (succptr) {
p->idx += p->regs.end[0]; p->curr += p->regs.end[0];
} }
if (getstr) { if (getstr) {
return rb_str_new(pbeg, p->regs.end[0]); return extract_beg_len(p, p->prev, p->regs.end[0]);
} }
else { else {
return INT2FIX(p->regs.end[0]); return INT2FIX(p->regs.end[0]);
} }
}
} }
static VALUE static VALUE
@ -308,6 +330,20 @@ strscan_search_full(self, re, s, f)
return strscan_do_scan(self, re, RTEST(s), RTEST(f), 0); return strscan_do_scan(self, re, RTEST(s), RTEST(f), 0);
} }
/* DANGEROUS; need to synchronize with regex.c */
static void
adjust_registers_to_matched(p)
struct strscanner *p;
{
if (p->regs.allocated == 0) {
p->regs.beg = ALLOC_N(int, RE_NREGS);
p->regs.end = ALLOC_N(int, RE_NREGS);
p->regs.allocated = RE_NREGS;
}
p->regs.num_regs = 1;
p->regs.beg[0] = 0;
p->regs.end[0] = p->curr - p->prev;
}
static VALUE static VALUE
strscan_getch(self) strscan_getch(self)
@ -322,11 +358,14 @@ strscan_getch(self)
return Qnil; return Qnil;
len = mbclen(*CURPTR(p)); len = mbclen(*CURPTR(p));
if (p->idx + len > S_LEN(p)) if (p->curr + len > S_LEN(p))
len = S_LEN(p) - p->idx; len = S_LEN(p) - p->curr;
MATCHED(p, p->idx, len); p->prev = p->curr;
p->idx += len; p->curr += len;
return rb_str_new(S_PTR(p) + p->beg1, p->end1 - p->beg1); MATCHED(p);
adjust_registers_to_matched(p);
return extract_range(p, p->prev + p->regs.beg[0],
p->prev + p->regs.end[0]);
} }
static VALUE static VALUE
@ -340,9 +379,12 @@ strscan_get_byte(self)
if (SCAN_FINISHED(p)) if (SCAN_FINISHED(p))
return Qnil; return Qnil;
MATCHED(p, p->idx, 1); p->prev = p->curr;
p->idx++; p->curr++;
return rb_str_new(S_PTR(p) + p->beg1, 1); MATCHED(p);
adjust_registers_to_matched(p);
return extract_range(p, p->prev + p->regs.beg[0],
p->prev + p->regs.end[0]);
} }
@ -356,14 +398,28 @@ strscan_peek(self, vlen)
GET_SCANNER(self, p); GET_SCANNER(self, p);
len = NUM2LONG(vlen); len = NUM2LONG(vlen);
if (SCAN_FINISHED(p)) { if (SCAN_FINISHED(p))
return rb_str_new("", 0); return infect(rb_str_new("", 0), p);
}
else { if (p->curr + len > S_LEN(p))
if (p->idx + len > S_LEN(p)) len = S_LEN(p) - p->curr;
len = S_LEN(p) - p->idx; return extract_beg_len(p, p->curr, len);
return rb_str_new(CURPTR(p), len); }
}
static VALUE
strscan_unscan(self)
VALUE self;
{
struct strscanner *p;
GET_SCANNER(self, p);
if (! MATCHED_P(p))
rb_raise(ScanError, "cannot unscan: prev match had failed");
p->curr = p->prev;
CLEAR_MATCH_STATUS(p);
return self;
} }
@ -393,36 +449,6 @@ strscan_rest_p(self)
return Qtrue; return Qtrue;
} }
static VALUE
strscan_rest(self)
VALUE self;
{
struct strscanner *p;
GET_SCANNER(self, p);
if (SCAN_FINISHED(p))
return rb_str_new("", 0);
else
return rb_str_new(CURPTR(p), S_LEN(p) - p->idx);
}
static VALUE
strscan_rest_size(self)
VALUE self;
{
struct strscanner *p;
GET_SCANNER(self, p);
if (SCAN_FINISHED(p)) {
return INT2FIX(0);
}
else {
long tmp;
tmp = S_LEN(p) - p->idx;
return INT2FIX(tmp);
}
}
static VALUE static VALUE
strscan_matched_p(self) strscan_matched_p(self)
@ -445,7 +471,9 @@ strscan_matched(self)
GET_SCANNER(self, p); GET_SCANNER(self, p);
if (! MATCHED_P(p)) return Qnil; if (! MATCHED_P(p)) return Qnil;
return rb_str_new(S_PTR(p) + p->beg1, p->end1 - p->beg1);
return extract_range(p, p->prev + p->regs.beg[0],
p->prev + p->regs.end[0]);
} }
static VALUE static VALUE
@ -457,46 +485,7 @@ strscan_matched_size(self)
GET_SCANNER(self, p); GET_SCANNER(self, p);
if (! MATCHED_P(p)) return Qnil; if (! MATCHED_P(p)) return Qnil;
return INT2NUM(p->end1 - p->beg1); return INT2NUM(p->regs.end[0] - p->regs.beg[0]);
}
static VALUE
strscan_unscan(self)
VALUE self;
{
struct strscanner *p;
GET_SCANNER(self, p);
if (! MATCHED_P(p))
rb_raise(ScanError, "cannot unscan: prev match had failed");
p->idx = p->end2;
CLEAR_MATCH_STATUS(p);
return self;
}
static VALUE
strscan_pre_match(self)
VALUE self;
{
struct strscanner *p;
GET_SCANNER(self, p);
if (! MATCHED_P(p)) return Qnil;
return rb_str_new(S_PTR(p) + p->end2, p->beg1 - p->end2);
}
static VALUE
strscan_post_match(self)
VALUE self;
{
struct strscanner *p;
GET_SCANNER(self, p);
if (! MATCHED_P(p)) return Qnil;
return rb_str_new(S_PTR(p) + p->end1, S_LEN(p) - p->end1);
} }
static VALUE static VALUE
@ -510,17 +499,73 @@ strscan_aref(self, idx)
if (! MATCHED_P(p)) return Qnil; if (! MATCHED_P(p)) return Qnil;
i = NUM2LONG(idx); i = NUM2LONG(idx);
if (i < 0)
i += p->regs.num_regs;
if (i < 0) return Qnil; if (i < 0) return Qnil;
if (i >= p->regs.num_regs) return Qnil; if (i >= p->regs.num_regs) return Qnil;
if (p->regs.beg[i] == -1) return Qnil; if (p->regs.beg[i] == -1) return Qnil;
return rb_str_new(S_PTR(p) + p->end2 + p->regs.beg[i], return extract_range(p, p->prev + p->regs.beg[i],
p->regs.end[i] - p->regs.beg[i]); p->prev + p->regs.end[i]);
}
static VALUE
strscan_pre_match(self)
VALUE self;
{
struct strscanner *p;
GET_SCANNER(self, p);
if (! MATCHED_P(p)) return Qnil;
return extract_range(p, 0, p->prev + p->regs.beg[0]);
}
static VALUE
strscan_post_match(self)
VALUE self;
{
struct strscanner *p;
GET_SCANNER(self, p);
if (! MATCHED_P(p)) return Qnil;
return extract_range(p, p->prev + p->regs.end[0], S_LEN(p));
}
static VALUE
strscan_rest(self)
VALUE self;
{
struct strscanner *p;
GET_SCANNER(self, p);
if (SCAN_FINISHED(p)) {
return infect(rb_str_new("", 0), p);
}
return extract_range(p, p->curr, S_LEN(p));
}
static VALUE
strscan_rest_size(self)
VALUE self;
{
struct strscanner *p;
long i;
GET_SCANNER(self, p);
if (SCAN_FINISHED(p)) {
return INT2FIX(0);
}
i = S_LEN(p) - p->curr;
return INT2FIX(i);
} }
static void static void
cat_i_char(ret, c) catchar(ret, c)
VALUE ret; VALUE ret;
int c; int c;
{ {
@ -544,7 +589,7 @@ strscan_inspect(self)
GET_SCANNER(self, p); GET_SCANNER(self, p);
len = sprintf(buf, "#<%s %ld/%ld", len = sprintf(buf, "#<%s %ld/%ld",
rb_class2name(CLASS_OF(self)), rb_class2name(CLASS_OF(self)),
p->idx, S_LEN(p)); p->curr, S_LEN(p));
ret = rb_str_new(buf, len); ret = rb_str_new(buf, len);
if (SCAN_FINISHED(p)) { if (SCAN_FINISHED(p)) {
@ -560,7 +605,7 @@ strscan_inspect(self)
if (sp > S_PTR(p)) if (sp > S_PTR(p))
rb_str_cat(ret, "...", 3); rb_str_cat(ret, "...", 3);
for (; sp < CURPTR(p); sp++) { for (; sp < CURPTR(p); sp++) {
cat_i_char(ret, *sp); catchar(ret, *sp);
} }
rb_str_cat(ret, "\"", 1); rb_str_cat(ret, "\"", 1);
} }
@ -572,7 +617,7 @@ strscan_inspect(self)
if (e > S_END(p)) e = S_END(p); if (e > S_END(p)) e = S_END(p);
rb_str_cat(ret, " \"", 2); rb_str_cat(ret, " \"", 2);
for (; sp < e; sp++) { for (; sp < e; sp++) {
cat_i_char(ret, *sp); catchar(ret, *sp);
} }
if (sp < S_END(p)) if (sp < S_END(p))
rb_str_cat(ret, "...", 3); rb_str_cat(ret, "...", 3);
@ -580,9 +625,10 @@ strscan_inspect(self)
} }
rb_str_cat(ret, ">", 1); rb_str_cat(ret, ">", 1);
} }
return ret; return infect(ret, p);
} }
/* ------------------------------------------------------------- */
void void
Init_strscan() Init_strscan()
@ -636,21 +682,23 @@ Init_strscan()
rb_define_method(StringScanner, "peek", strscan_peek, 1); rb_define_method(StringScanner, "peek", strscan_peek, 1);
rb_define_method(StringScanner, "peep", strscan_peek, 1); rb_define_method(StringScanner, "peep", strscan_peek, 1);
rb_define_method(StringScanner, "unscan", strscan_unscan, 0);
rb_define_method(StringScanner, "eos?", strscan_eos_p, 0); rb_define_method(StringScanner, "eos?", strscan_eos_p, 0);
rb_define_method(StringScanner, "empty?", strscan_eos_p, 0); rb_define_method(StringScanner, "empty?", strscan_eos_p, 0);
rb_define_method(StringScanner, "rest?", strscan_rest_p, 0); rb_define_method(StringScanner, "rest?", strscan_rest_p, 0);
rb_define_method(StringScanner, "rest", strscan_rest, 0);
rb_define_method(StringScanner, "rest_size", strscan_rest_size, 0);
rb_define_method(StringScanner, "restsize", strscan_rest_size, 0);
rb_define_method(StringScanner, "matched?", strscan_matched_p, 0); rb_define_method(StringScanner, "matched?", strscan_matched_p, 0);
rb_define_method(StringScanner, "matched", strscan_matched, 0); rb_define_method(StringScanner, "matched", strscan_matched, 0);
rb_define_method(StringScanner, "matched_size", strscan_matched_size, 0); rb_define_method(StringScanner, "matched_size", strscan_matched_size, 0);
rb_define_method(StringScanner, "matchedsize", strscan_matched_size, 0); rb_define_method(StringScanner, "matchedsize", strscan_matched_size, 0);
rb_define_method(StringScanner, "unscan", strscan_unscan, 0); rb_define_method(StringScanner, "[]", strscan_aref, 1);
rb_define_method(StringScanner, "pre_match", strscan_pre_match, 0); rb_define_method(StringScanner, "pre_match", strscan_pre_match, 0);
rb_define_method(StringScanner, "post_match", strscan_post_match, 0); rb_define_method(StringScanner, "post_match", strscan_post_match, 0);
rb_define_method(StringScanner, "[]", strscan_aref, 1);
rb_define_method(StringScanner, "rest", strscan_rest, 0);
rb_define_method(StringScanner, "rest_size", strscan_rest_size, 0);
rb_define_method(StringScanner, "restsize", strscan_rest_size, 0);
rb_define_method(StringScanner, "inspect", strscan_inspect, 0); rb_define_method(StringScanner, "inspect", strscan_inspect, 0);
} }