* regparse.c, oniguruma.h: imported Oni Guruma 2.2.6.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@6096 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
14b8903137
commit
62a7fde33b
@ -1,3 +1,7 @@
|
|||||||
|
Mon Apr 5 19:39:10 2004 Kazuo Saito <ksaito@uranus.dti.ne.jp>
|
||||||
|
|
||||||
|
* regparse.c, oniguruma.h: imported Oni Guruma 2.2.6.
|
||||||
|
|
||||||
Mon Apr 5 12:12:09 2004 NAKAMURA Usaku <usa@ruby-lang.org>
|
Mon Apr 5 12:12:09 2004 NAKAMURA Usaku <usa@ruby-lang.org>
|
||||||
|
|
||||||
* ext/socket/socket.c (raise_socket_error): some platforms don't have
|
* ext/socket/socket.c (raise_socket_error): some platforms don't have
|
||||||
|
@ -11,7 +11,7 @@
|
|||||||
#define ONIGURUMA
|
#define ONIGURUMA
|
||||||
#define ONIGURUMA_VERSION_MAJOR 2
|
#define ONIGURUMA_VERSION_MAJOR 2
|
||||||
#define ONIGURUMA_VERSION_MINOR 2
|
#define ONIGURUMA_VERSION_MINOR 2
|
||||||
#define ONIGURUMA_VERSION_TEENY 5
|
#define ONIGURUMA_VERSION_TEENY 6
|
||||||
|
|
||||||
#ifndef P_
|
#ifndef P_
|
||||||
#if defined(__STDC__) || defined(_WIN32)
|
#if defined(__STDC__) || defined(_WIN32)
|
||||||
|
171
regparse.c
171
regparse.c
@ -2020,26 +2020,29 @@ popular_qualifier_num(QualifierNode* qf)
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum ReduceType {
|
||||||
|
RQ_ASIS = 0, /* as is */
|
||||||
|
RQ_DEL = 1, /* delete parent */
|
||||||
|
RQ_A, /* to '*' */
|
||||||
|
RQ_AQ, /* to '*?' */
|
||||||
|
RQ_QQ, /* to '??' */
|
||||||
|
RQ_P_QQ, /* to '+)??' */
|
||||||
|
RQ_PQ_Q, /* to '+?)?' */
|
||||||
|
};
|
||||||
|
|
||||||
|
static enum ReduceType ReduceTypeTable[6][6] = {
|
||||||
|
{RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */
|
||||||
|
{RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */
|
||||||
|
{RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */
|
||||||
|
{RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */
|
||||||
|
{RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */
|
||||||
|
{RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
extern void
|
extern void
|
||||||
onig_reduce_nested_qualifier(Node* pnode, Node* cnode)
|
onig_reduce_nested_qualifier(Node* pnode, Node* cnode)
|
||||||
{
|
{
|
||||||
#define NQ_ASIS 0 /* as is */
|
|
||||||
#define NQ_DEL 1 /* delete parent */
|
|
||||||
#define NQ_A 2 /* to '*' */
|
|
||||||
#define NQ_AQ 3 /* to '*?' */
|
|
||||||
#define NQ_QQ 4 /* to '??' */
|
|
||||||
#define NQ_P_QQ 5 /* to '+)??' */
|
|
||||||
#define NQ_PQ_Q 6 /* to '+?)?' */
|
|
||||||
|
|
||||||
static char reduces[][6] = {
|
|
||||||
{NQ_DEL, NQ_A, NQ_A, NQ_QQ, NQ_AQ, NQ_ASIS}, /* '?' */
|
|
||||||
{NQ_DEL, NQ_DEL, NQ_DEL, NQ_P_QQ, NQ_P_QQ, NQ_DEL}, /* '*' */
|
|
||||||
{NQ_A, NQ_A, NQ_DEL, NQ_ASIS, NQ_P_QQ, NQ_DEL}, /* '+' */
|
|
||||||
{NQ_DEL, NQ_AQ, NQ_AQ, NQ_DEL, NQ_AQ, NQ_AQ}, /* '??' */
|
|
||||||
{NQ_DEL, NQ_DEL, NQ_DEL, NQ_DEL, NQ_DEL, NQ_DEL}, /* '*?' */
|
|
||||||
{NQ_ASIS, NQ_PQ_Q, NQ_DEL, NQ_AQ, NQ_AQ, NQ_DEL} /* '+?' */
|
|
||||||
};
|
|
||||||
|
|
||||||
int pnum, cnum;
|
int pnum, cnum;
|
||||||
QualifierNode *p, *c;
|
QualifierNode *p, *c;
|
||||||
|
|
||||||
@ -2048,35 +2051,35 @@ onig_reduce_nested_qualifier(Node* pnode, Node* cnode)
|
|||||||
pnum = popular_qualifier_num(p);
|
pnum = popular_qualifier_num(p);
|
||||||
cnum = popular_qualifier_num(c);
|
cnum = popular_qualifier_num(c);
|
||||||
|
|
||||||
switch(reduces[cnum][pnum]) {
|
switch(ReduceTypeTable[cnum][pnum]) {
|
||||||
case NQ_DEL:
|
case RQ_DEL:
|
||||||
*p = *c;
|
*p = *c;
|
||||||
break;
|
break;
|
||||||
case NQ_A:
|
case RQ_A:
|
||||||
p->target = c->target;
|
p->target = c->target;
|
||||||
p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1;
|
p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1;
|
||||||
break;
|
break;
|
||||||
case NQ_AQ:
|
case RQ_AQ:
|
||||||
p->target = c->target;
|
p->target = c->target;
|
||||||
p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0;
|
p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0;
|
||||||
break;
|
break;
|
||||||
case NQ_QQ:
|
case RQ_QQ:
|
||||||
p->target = c->target;
|
p->target = c->target;
|
||||||
p->lower = 0; p->upper = 1; p->greedy = 0;
|
p->lower = 0; p->upper = 1; p->greedy = 0;
|
||||||
break;
|
break;
|
||||||
case NQ_P_QQ:
|
case RQ_P_QQ:
|
||||||
p->target = cnode;
|
p->target = cnode;
|
||||||
p->lower = 0; p->upper = 1; p->greedy = 0;
|
p->lower = 0; p->upper = 1; p->greedy = 0;
|
||||||
c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1;
|
c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1;
|
||||||
return ;
|
return ;
|
||||||
break;
|
break;
|
||||||
case NQ_PQ_Q:
|
case RQ_PQ_Q:
|
||||||
p->target = cnode;
|
p->target = cnode;
|
||||||
p->lower = 0; p->upper = 1; p->greedy = 1;
|
p->lower = 0; p->upper = 1; p->greedy = 1;
|
||||||
c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0;
|
c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0;
|
||||||
return ;
|
return ;
|
||||||
break;
|
break;
|
||||||
case NQ_ASIS:
|
case RQ_ASIS:
|
||||||
p->target = cnode;
|
p->target = cnode;
|
||||||
return ;
|
return ;
|
||||||
break;
|
break;
|
||||||
@ -2312,6 +2315,7 @@ fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
|
|||||||
{
|
{
|
||||||
int r, len, is_num;
|
int r, len, is_num;
|
||||||
int c = 0;
|
int c = 0;
|
||||||
|
OnigCodePoint code, first_code;
|
||||||
UChar *name_end;
|
UChar *name_end;
|
||||||
UChar *p = *src;
|
UChar *p = *src;
|
||||||
|
|
||||||
@ -2322,17 +2326,22 @@ fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
|
|||||||
return ONIGERR_EMPTY_GROUP_NAME;
|
return ONIGERR_EMPTY_GROUP_NAME;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
first_code = ONIGENC_MBC_TO_CODE(env->enc, p, end);
|
||||||
PFETCH(c);
|
PFETCH(c);
|
||||||
if (c == '>')
|
if (c == '>')
|
||||||
return ONIGERR_EMPTY_GROUP_NAME;
|
return ONIGERR_EMPTY_GROUP_NAME;
|
||||||
|
|
||||||
if (ONIGENC_IS_CODE_DIGIT(env->enc, c)) {
|
if (ONIGENC_IS_CODE_DIGIT(env->enc, first_code)) {
|
||||||
if (ref == 1)
|
if (ref == 1)
|
||||||
is_num = 1;
|
is_num = 1;
|
||||||
else {
|
else {
|
||||||
r = ONIGERR_INVALID_GROUP_NAME;
|
r = ONIGERR_INVALID_GROUP_NAME;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (! ONIGENC_IS_CODE_WORD(env->enc, first_code)) {
|
||||||
|
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
|
||||||
|
}
|
||||||
|
|
||||||
len = enc_len(env->enc, c);
|
len = enc_len(env->enc, c);
|
||||||
while (!PEND && len-- > 1)
|
while (!PEND && len-- > 1)
|
||||||
PFETCH(c);
|
PFETCH(c);
|
||||||
@ -2340,26 +2349,28 @@ fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
|
|||||||
|
|
||||||
while (!PEND) {
|
while (!PEND) {
|
||||||
name_end = p;
|
name_end = p;
|
||||||
|
code = ONIGENC_MBC_TO_CODE(env->enc, p, end);
|
||||||
PFETCH(c);
|
PFETCH(c);
|
||||||
if (c == '>' || c == ')') break;
|
if (c == '>' || c == ')') break;
|
||||||
|
|
||||||
len = enc_len(env->enc, c);
|
len = enc_len(env->enc, c);
|
||||||
if (is_num == 1) {
|
if (is_num == 1) {
|
||||||
if (! ONIGENC_IS_CODE_DIGIT(env->enc, c)) {
|
if (len == 1) {
|
||||||
if (!ONIGENC_IS_CODE_ALPHA(env->enc, c) && c != '_')
|
if (! ONIGENC_IS_CODE_DIGIT(env->enc, code)) {
|
||||||
|
if (!ONIGENC_IS_CODE_WORD(env->enc, code))
|
||||||
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
|
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
|
||||||
else
|
else
|
||||||
r = ONIGERR_INVALID_GROUP_NAME;
|
r = ONIGERR_INVALID_GROUP_NAME;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (len == 1) {
|
|
||||||
if (!ONIGENC_IS_CODE_ALPHA(env->enc, c) &&
|
|
||||||
!ONIGENC_IS_CODE_DIGIT(env->enc, c) &&
|
|
||||||
c != '_') {
|
|
||||||
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
|
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
if (! ONIGENC_IS_CODE_WORD(env->enc, code)) {
|
||||||
|
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
while (!PEND && len-- > 1)
|
while (!PEND && len-- > 1)
|
||||||
@ -2370,8 +2381,7 @@ fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
|
|||||||
name_end = end;
|
name_end = end;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
c = **src;
|
if (ONIGENC_IS_CODE_UPPER(env->enc, first_code))
|
||||||
if (ONIGENC_IS_CODE_UPPER(env->enc, c))
|
|
||||||
r = ONIGERR_INVALID_GROUP_NAME;
|
r = ONIGERR_INVALID_GROUP_NAME;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2391,19 +2401,24 @@ fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
|
|||||||
{
|
{
|
||||||
int r, len;
|
int r, len;
|
||||||
int c = 0;
|
int c = 0;
|
||||||
|
OnigCodePoint code;
|
||||||
UChar *name_end;
|
UChar *name_end;
|
||||||
UChar *p = *src;
|
UChar *p = *src;
|
||||||
|
|
||||||
r = 0;
|
r = 0;
|
||||||
while (!PEND) {
|
while (!PEND) {
|
||||||
name_end = p;
|
name_end = p;
|
||||||
|
code = ONIGENC_MBC_TO_CODE(env->enc, p, end);
|
||||||
|
len = enc_len(env->enc, c);
|
||||||
PFETCH(c);
|
PFETCH(c);
|
||||||
if (enc_len(env->enc, c) > 1)
|
if (len > 1)
|
||||||
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
|
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
|
||||||
|
|
||||||
if (c == '>' || c == ')') break;
|
if (c == '>' || c == ')') break;
|
||||||
if (! ONIGENC_IS_CODE_DIGIT(env->enc, c))
|
if (! ONIGENC_IS_CODE_DIGIT(env->enc, code))
|
||||||
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
|
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
|
||||||
|
|
||||||
|
p += (len - 1);
|
||||||
}
|
}
|
||||||
if (c != '>') {
|
if (c != '>') {
|
||||||
r = ONIGERR_INVALID_GROUP_NAME;
|
r = ONIGERR_INVALID_GROUP_NAME;
|
||||||
@ -3174,6 +3189,21 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case '(':
|
case '(':
|
||||||
|
if (PPEEK == '?' &&
|
||||||
|
IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
|
||||||
|
PINC;
|
||||||
|
if (PPEEK == '#') {
|
||||||
|
PFETCH(c);
|
||||||
|
while (1) {
|
||||||
|
if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
|
||||||
|
PFETCH(c);
|
||||||
|
if (c == ')') break;
|
||||||
|
}
|
||||||
|
goto start;
|
||||||
|
}
|
||||||
|
PUNFETCH;
|
||||||
|
}
|
||||||
|
|
||||||
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
|
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
|
||||||
tok->type = TK_SUBEXP_OPEN;
|
tok->type = TK_SUBEXP_OPEN;
|
||||||
break;
|
break;
|
||||||
@ -4024,16 +4054,6 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
|
|||||||
|
|
||||||
PFETCH(c);
|
PFETCH(c);
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case '#': /* (?#...) comment */
|
|
||||||
while (1) {
|
|
||||||
if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
|
|
||||||
PFETCH(c);
|
|
||||||
if (c == ')') break;
|
|
||||||
}
|
|
||||||
*src = p;
|
|
||||||
return 3; /* 3: comment */
|
|
||||||
break;
|
|
||||||
|
|
||||||
case ':': /* (?:...) grouping only */
|
case ':': /* (?:...) grouping only */
|
||||||
group:
|
group:
|
||||||
r = fetch_token(tok, &p, end, env);
|
r = fetch_token(tok, &p, end, env);
|
||||||
@ -4231,6 +4251,15 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static char* PopularQStr[] = {
|
||||||
|
"?", "*", "+", "??", "*?", "+?"
|
||||||
|
};
|
||||||
|
|
||||||
|
static char* ReduceQStr[] = {
|
||||||
|
"", "", "*", "*?", "??", "+ and ??", "+? and ?"
|
||||||
|
};
|
||||||
|
|
||||||
static int
|
static int
|
||||||
set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
|
set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
|
||||||
{
|
{
|
||||||
@ -4263,11 +4292,17 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
|
|||||||
#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
|
#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
|
||||||
if (qn->by_number == 0 && qnt->by_number == 0 &&
|
if (qn->by_number == 0 && qnt->by_number == 0 &&
|
||||||
IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
|
IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
|
||||||
if (IS_REPEAT_INFINITE(qn->upper)) {
|
int nestq_num, targetq_num;
|
||||||
if (qn->lower == 0) { /* '*' */
|
|
||||||
redundant:
|
|
||||||
{
|
|
||||||
char buf[WARN_BUFSIZE];
|
char buf[WARN_BUFSIZE];
|
||||||
|
|
||||||
|
nestq_num = popular_qualifier_num(qn);
|
||||||
|
targetq_num = popular_qualifier_num(qnt);
|
||||||
|
|
||||||
|
switch(ReduceTypeTable[targetq_num][nestq_num]) {
|
||||||
|
case RQ_ASIS:
|
||||||
|
break;
|
||||||
|
|
||||||
|
case RQ_DEL:
|
||||||
if (onig_verb_warn != onig_null_warn) {
|
if (onig_verb_warn != onig_null_warn) {
|
||||||
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
|
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
|
||||||
env->pattern, env->pattern_end,
|
env->pattern, env->pattern_end,
|
||||||
@ -4275,25 +4310,19 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
|
|||||||
(*onig_verb_warn)(buf);
|
(*onig_verb_warn)(buf);
|
||||||
}
|
}
|
||||||
goto warn_exit;
|
goto warn_exit;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
if (onig_verb_warn != onig_null_warn) {
|
||||||
|
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
|
||||||
|
env->pattern, env->pattern_end,
|
||||||
|
"nested repeat operator '%s and %s' should be replaced with '%s'",
|
||||||
|
PopularQStr[targetq_num], PopularQStr[nestq_num],
|
||||||
|
ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
|
||||||
|
(*onig_verb_warn)(buf);
|
||||||
}
|
}
|
||||||
}
|
goto warn_exit;
|
||||||
else if (qn->lower == 1) { /* '+' */
|
break;
|
||||||
/* (?:a?)+? only allowed. */
|
|
||||||
if (qn->greedy || !(qnt->upper == 1 && qnt->greedy))
|
|
||||||
goto redundant;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (qn->upper == 1 && qn->lower == 0) {
|
|
||||||
if (qn->greedy) { /* '?' */
|
|
||||||
if (!(qnt->lower == 1 && qnt->greedy == 0)) /* not '+?' */
|
|
||||||
goto redundant;
|
|
||||||
}
|
|
||||||
else { /* '??' */
|
|
||||||
/* '(?:a+)?? only allowd. (?:a*)?? can be replaced to (?:a+)?? */
|
|
||||||
if (!(qnt->greedy && qnt->lower == 1 &&
|
|
||||||
IS_REPEAT_INFINITE(qnt->upper)))
|
|
||||||
goto redundant;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4392,7 +4421,6 @@ parse_exp(Node** np, OnigToken* tok, int term,
|
|||||||
Node* qn;
|
Node* qn;
|
||||||
Node** targetp;
|
Node** targetp;
|
||||||
|
|
||||||
start:
|
|
||||||
*np = NULL;
|
*np = NULL;
|
||||||
if (tok->type == term)
|
if (tok->type == term)
|
||||||
goto end_of_token;
|
goto end_of_token;
|
||||||
@ -4422,11 +4450,6 @@ parse_exp(Node** np, OnigToken* tok, int term,
|
|||||||
NEFFECT(*np).target = target;
|
NEFFECT(*np).target = target;
|
||||||
return tok->type;
|
return tok->type;
|
||||||
}
|
}
|
||||||
else if (r == 3) { /* comment */
|
|
||||||
r = fetch_token(tok, src, end, env);
|
|
||||||
if (r < 0) return r;
|
|
||||||
goto start;
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case TK_SUBEXP_CLOSE:
|
case TK_SUBEXP_CLOSE:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user