Warn duplicated characters in character class of regexp. [ruby-core:24593]

* include/ruby/oniguruma.h (ONIG_SYN_WARN_CC_DUP): defined.
* regparse.h (ScanEnv): add warnings_flag.
* regparse.c (CC_DUP_WARN): defined for warn duplicated characters in
  character class of regexp. [ruby-core:24593]
  (add_code_range_to_buf): add CC_DUP_WARN.
  (next_state_val): add CC_DUP_WARN.
  (OnigSyntaxRuby): add ONIG_SYN_WARN_CC_DUP.
  (SET_ALL_MULTI_BYTE_RANGE): add env to arguments.
  (add_code_range): ditto.
  (add_code_range_to_buf): ditto.
  (not_code_range_buf): ditto.
  (or_code_range_buf): ditto.
  (and_code_range1): ditto.
  (and_code_range_buf): ditto.
  (and_cclass): ditto.
  (or_cclass): ditto.
  (add_ctype_to_cc_by_range): ditto.
  (add_ctype_to_cc): ditto.
  (parse_char_class): ditto.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@24387 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2009-08-04 04:14:38 +00:00
parent 38107457a3
commit 60ab77a16a
4 changed files with 103 additions and 51 deletions

View File

@ -1,3 +1,25 @@
Tue Aug 4 13:07:10 2009 NARUSE, Yui <naruse@ruby-lang.org>
* include/ruby/oniguruma.h (ONIG_SYN_WARN_CC_DUP): defined.
* regparse.h (ScanEnv): add warnings_flag.
* regparse.c (CC_DUP_WARN): defined for warn duplicated characters in
character class of regexp. [ruby-core:24593]
(add_code_range_to_buf): add CC_DUP_WARN.
(next_state_val): add CC_DUP_WARN.
(OnigSyntaxRuby): add ONIG_SYN_WARN_CC_DUP.
(SET_ALL_MULTI_BYTE_RANGE): add env to arguments.
(add_code_range): ditto.
(add_code_range_to_buf): ditto.
(not_code_range_buf): ditto.
(or_code_range_buf): ditto.
(and_code_range1): ditto.
(and_code_range_buf): ditto.
(and_cclass): ditto.
(or_cclass): ditto.
(add_ctype_to_cc_by_range): ditto.
(add_ctype_to_cc): ditto.
(parse_char_class): ditto.
Tue Aug 4 12:40:45 2009 NARUSE, Yui <naruse@ruby-lang.org> Tue Aug 4 12:40:45 2009 NARUSE, Yui <naruse@ruby-lang.org>
* enc/encdb.c (ENC_SET_BASE): fix typo. * enc/encdb.c (ENC_SET_BASE): fix typo.

View File

@ -466,6 +466,7 @@ ONIG_EXTERN const OnigSyntaxType* OnigDefaultSyntax;
/* syntax (behavior) warning */ /* syntax (behavior) warning */
#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */ #define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */
#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */ #define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */
#define ONIG_SYN_WARN_CC_DUP (1U<<26) /* [aa] */
/* meta character specifiers (onig_set_meta_char()) */ /* meta character specifiers (onig_set_meta_char()) */
#define ONIG_META_CHAR_ESCAPE 0 #define ONIG_META_CHAR_ESCAPE 0

View File

@ -57,6 +57,7 @@ const OnigSyntaxType OnigSyntaxRuby = {
ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME | ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY | ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
ONIG_SYN_WARN_CC_OP_NOT_ESCAPED | ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
ONIG_SYN_WARN_CC_DUP |
ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT ) ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
, ONIG_OPTION_NONE , ONIG_OPTION_NONE
, ,
@ -96,6 +97,8 @@ extern void onig_set_verb_warn_func(OnigWarnFunc f)
onig_verb_warn = f; onig_verb_warn = f;
} }
static void CC_DUP_WARN(ScanEnv *env);
static void static void
bbuf_free(BBuf* bbuf) bbuf_free(BBuf* bbuf)
{ {
@ -129,7 +132,7 @@ bbuf_clone(BBuf** rto, BBuf* from)
(OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80) (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \ #define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0)) add_code_range_to_buf(pbuf, env, MBCODE_START_POS(enc), ~((OnigCodePoint )0))
#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\ #define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
if (! ONIGENC_IS_SINGLEBYTE(enc)) {\ if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
@ -139,6 +142,11 @@ bbuf_clone(BBuf** rto, BBuf* from)
} while (0) } while (0)
#define BITSET_SET_BIT_CHKDUP(bs, pos) do { \
if (BITSET_AT(bs, pos)) CC_DUP_WARN(env); \
BS_ROOM(bs, pos) |= BS_BIT(pos); \
} while (0)
#define BITSET_IS_EMPTY(bs,empty) do {\ #define BITSET_IS_EMPTY(bs,empty) do {\
int i;\ int i;\
empty = 1;\ empty = 1;\
@ -150,11 +158,11 @@ bbuf_clone(BBuf** rto, BBuf* from)
} while (0) } while (0)
static void static void
bitset_set_range(BitSetRef bs, int from, int to) bitset_set_range(ScanEnv *env, BitSetRef bs, int from, int to)
{ {
int i; int i;
for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) { for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
BITSET_SET_BIT(bs, i); BITSET_SET_BIT_CHKDUP(bs, i);
} }
} }
@ -1689,7 +1697,7 @@ new_code_range(BBuf** pbuf)
} }
static int static int
add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to) add_code_range_to_buf(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
{ {
int r, inc_n, pos; int r, inc_n, pos;
int low, high, bound, x; int low, high, bound, x;
@ -1736,8 +1744,10 @@ add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)
if (inc_n != 1) { if (inc_n != 1) {
if (from > data[low*2]) if (from > data[low*2])
from = data[low*2]; from = data[low*2];
else CC_DUP_WARN(env);
if (to < data[(high - 1)*2 + 1]) if (to < data[(high - 1)*2 + 1])
to = data[(high - 1)*2 + 1]; to = data[(high - 1)*2 + 1];
else CC_DUP_WARN(env);
} }
if (inc_n != 0 && (OnigCodePoint )high < n) { if (inc_n != 0 && (OnigCodePoint )high < n) {
@ -1773,11 +1783,11 @@ add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
} }
return add_code_range_to_buf(pbuf, from, to); return add_code_range_to_buf(pbuf, env, from, to);
} }
static int static int
not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf) not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf, ScanEnv* env)
{ {
int r, i, n; int r, i, n;
OnigCodePoint pre, from, *data, to = 0; OnigCodePoint pre, from, *data, to = 0;
@ -1799,14 +1809,14 @@ not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)
from = data[i*2]; from = data[i*2];
to = data[i*2+1]; to = data[i*2+1];
if (pre <= from - 1) { if (pre <= from - 1) {
r = add_code_range_to_buf(pbuf, pre, from - 1); r = add_code_range_to_buf(pbuf, env, pre, from - 1);
if (r != 0) return r; if (r != 0) return r;
} }
if (to == ~((OnigCodePoint )0)) break; if (to == ~((OnigCodePoint )0)) break;
pre = to + 1; pre = to + 1;
} }
if (to < ~((OnigCodePoint )0)) { if (to < ~((OnigCodePoint )0)) {
r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0)); r = add_code_range_to_buf(pbuf, env, to + 1, ~((OnigCodePoint )0));
} }
return r; return r;
} }
@ -1820,7 +1830,7 @@ not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)
static int static int
or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1, or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
BBuf* bbuf2, int not2, BBuf** pbuf) BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env)
{ {
int r; int r;
OnigCodePoint i, n1, *data1; OnigCodePoint i, n1, *data1;
@ -1846,7 +1856,7 @@ or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
return bbuf_clone(pbuf, bbuf2); return bbuf_clone(pbuf, bbuf2);
} }
else { else {
return not_code_range_buf(enc, bbuf2, pbuf); return not_code_range_buf(enc, bbuf2, pbuf, env);
} }
} }
} }
@ -1862,21 +1872,21 @@ or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
r = bbuf_clone(pbuf, bbuf2); r = bbuf_clone(pbuf, bbuf2);
} }
else if (not1 == 0) { /* 1 OR (not 2) */ else if (not1 == 0) { /* 1 OR (not 2) */
r = not_code_range_buf(enc, bbuf2, pbuf); r = not_code_range_buf(enc, bbuf2, pbuf, env);
} }
if (r != 0) return r; if (r != 0) return r;
for (i = 0; i < n1; i++) { for (i = 0; i < n1; i++) {
from = data1[i*2]; from = data1[i*2];
to = data1[i*2+1]; to = data1[i*2+1];
r = add_code_range_to_buf(pbuf, from, to); r = add_code_range_to_buf(pbuf, env, from, to);
if (r != 0) return r; if (r != 0) return r;
} }
return 0; return 0;
} }
static int static int
and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1, and_code_range1(BBuf** pbuf, ScanEnv* env, OnigCodePoint from1, OnigCodePoint to1,
OnigCodePoint* data, int n) OnigCodePoint* data, int n)
{ {
int i, r; int i, r;
@ -1894,7 +1904,7 @@ and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,
else if (from2 <= to1) { else if (from2 <= to1) {
if (to2 < to1) { if (to2 < to1) {
if (from1 <= from2 - 1) { if (from1 <= from2 - 1) {
r = add_code_range_to_buf(pbuf, from1, from2-1); r = add_code_range_to_buf(pbuf, env, from1, from2-1);
if (r != 0) return r; if (r != 0) return r;
} }
from1 = to2 + 1; from1 = to2 + 1;
@ -1909,14 +1919,14 @@ and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,
if (from1 > to1) break; if (from1 > to1) break;
} }
if (from1 <= to1) { if (from1 <= to1) {
r = add_code_range_to_buf(pbuf, from1, to1); r = add_code_range_to_buf(pbuf, env, from1, to1);
if (r != 0) return r; if (r != 0) return r;
} }
return 0; return 0;
} }
static int static int
and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf) and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env)
{ {
int r; int r;
OnigCodePoint i, j, n1, n2, *data1, *data2; OnigCodePoint i, j, n1, n2, *data1, *data2;
@ -1955,7 +1965,7 @@ and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
if (to2 < from1) continue; if (to2 < from1) continue;
from = MAX(from1, from2); from = MAX(from1, from2);
to = MIN(to1, to2); to = MIN(to1, to2);
r = add_code_range_to_buf(pbuf, from, to); r = add_code_range_to_buf(pbuf, env, from, to);
if (r != 0) return r; if (r != 0) return r;
} }
} }
@ -1964,7 +1974,7 @@ and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
for (i = 0; i < n1; i++) { for (i = 0; i < n1; i++) {
from1 = data1[i*2]; from1 = data1[i*2];
to1 = data1[i*2+1]; to1 = data1[i*2+1];
r = and_code_range1(pbuf, from1, to1, data2, n2); r = and_code_range1(pbuf, env, from1, to1, data2, n2);
if (r != 0) return r; if (r != 0) return r;
} }
} }
@ -1973,8 +1983,9 @@ and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
} }
static int static int
and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) and_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env)
{ {
OnigEncoding enc = env->enc;
int r, not1, not2; int r, not1, not2;
BBuf *buf1, *buf2, *pbuf; BBuf *buf1, *buf2, *pbuf;
BitSetRef bsr1, bsr2; BitSetRef bsr1, bsr2;
@ -2006,13 +2017,13 @@ and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
if (! ONIGENC_IS_SINGLEBYTE(enc)) { if (! ONIGENC_IS_SINGLEBYTE(enc)) {
if (not1 != 0 && not2 != 0) { if (not1 != 0 && not2 != 0) {
r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf); r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf, env);
} }
else { else {
r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf); r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf, env);
if (r == 0 && not1 != 0) { if (r == 0 && not1 != 0) {
BBuf *tbuf; BBuf *tbuf;
r = not_code_range_buf(enc, pbuf, &tbuf); r = not_code_range_buf(enc, pbuf, &tbuf, env);
if (r != 0) { if (r != 0) {
bbuf_free(pbuf); bbuf_free(pbuf);
return r; return r;
@ -2031,8 +2042,9 @@ and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
} }
static int static int
or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) or_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env)
{ {
OnigEncoding enc = env->enc;
int r, not1, not2; int r, not1, not2;
BBuf *buf1, *buf2, *pbuf; BBuf *buf1, *buf2, *pbuf;
BitSetRef bsr1, bsr2; BitSetRef bsr1, bsr2;
@ -2064,13 +2076,13 @@ or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
if (! ONIGENC_IS_SINGLEBYTE(enc)) { if (! ONIGENC_IS_SINGLEBYTE(enc)) {
if (not1 != 0 && not2 != 0) { if (not1 != 0 && not2 != 0) {
r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf); r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf, env);
} }
else { else {
r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf); r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf, env);
if (r == 0 && not1 != 0) { if (r == 0 && not1 != 0) {
BBuf *tbuf; BBuf *tbuf;
r = not_code_range_buf(enc, pbuf, &tbuf); r = not_code_range_buf(enc, pbuf, &tbuf, env);
if (r != 0) { if (r != 0) {
bbuf_free(pbuf); bbuf_free(pbuf);
return r; return r;
@ -2839,6 +2851,22 @@ CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c)
} }
} }
static void
CC_DUP_WARN(ScanEnv *env)
{
UChar buf[WARN_BUFSIZE];
if (onig_warn == onig_null_warn || !RTEST(ruby_verbose)) return ;
if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_DUP) &&
!((env)->warnings_flag & ONIG_SYN_WARN_CC_DUP)) {
(env)->warnings_flag |= ONIG_SYN_WARN_CC_DUP;
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
env->pattern, env->pattern_end,
(UChar* )"character class has duplicated range");
(*onig_warn)((char* )buf);
}
}
static UChar* static UChar*
find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to, find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
UChar **next, OnigEncoding enc) UChar **next, OnigEncoding enc)
@ -3756,7 +3784,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
static int static int
add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not, add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
OnigEncoding enc ARG_UNUSED, ScanEnv* env,
OnigCodePoint sb_out, const OnigCodePoint mbr[]) OnigCodePoint sb_out, const OnigCodePoint mbr[])
{ {
int i, r; int i, r;
@ -3771,7 +3799,7 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
if (j >= sb_out) { if (j >= sb_out) {
if (j == ONIGENC_CODE_RANGE_TO(mbr, i)) i++; if (j == ONIGENC_CODE_RANGE_TO(mbr, i)) i++;
else if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) { else if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
r = add_code_range_to_buf(&(cc->mbuf), j, r = add_code_range_to_buf(&(cc->mbuf), env, j,
ONIGENC_CODE_RANGE_TO(mbr, i)); ONIGENC_CODE_RANGE_TO(mbr, i));
if (r != 0) return r; if (r != 0) return r;
i++; i++;
@ -3779,13 +3807,13 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
goto sb_end; goto sb_end;
} }
BITSET_SET_BIT(cc->bs, j); BITSET_SET_BIT_CHKDUP(cc->bs, j);
} }
} }
sb_end: sb_end:
for ( ; i < n; i++) { for ( ; i < n; i++) {
r = add_code_range_to_buf(&(cc->mbuf), r = add_code_range_to_buf(&(cc->mbuf), env,
ONIGENC_CODE_RANGE_FROM(mbr, i), ONIGENC_CODE_RANGE_FROM(mbr, i),
ONIGENC_CODE_RANGE_TO(mbr, i)); ONIGENC_CODE_RANGE_TO(mbr, i));
if (r != 0) return r; if (r != 0) return r;
@ -3800,12 +3828,12 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
if (j >= sb_out) { if (j >= sb_out) {
goto sb_end2; goto sb_end2;
} }
BITSET_SET_BIT(cc->bs, j); BITSET_SET_BIT_CHKDUP(cc->bs, j);
} }
prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1; prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
} }
for (j = prev; j < sb_out; j++) { for (j = prev; j < sb_out; j++) {
BITSET_SET_BIT(cc->bs, j); BITSET_SET_BIT_CHKDUP(cc->bs, j);
} }
sb_end2: sb_end2:
@ -3813,14 +3841,14 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) { if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
r = add_code_range_to_buf(&(cc->mbuf), prev, r = add_code_range_to_buf(&(cc->mbuf), env, prev,
ONIGENC_CODE_RANGE_FROM(mbr, i) - 1); ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
if (r != 0) return r; if (r != 0) return r;
} }
prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1; prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
} }
if (prev < 0x7fffffff) { if (prev < 0x7fffffff) {
r = add_code_range_to_buf(&(cc->mbuf), prev, 0x7fffffff); r = add_code_range_to_buf(&(cc->mbuf), env, prev, 0x7fffffff);
if (r != 0) return r; if (r != 0) return r;
} }
} }
@ -3838,7 +3866,7 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges); r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
if (r == 0) { if (r == 0) {
return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges); return add_ctype_to_cc_by_range(cc, ctype, not, env, sb_out, ranges);
} }
else if (r != ONIG_NO_SUPPORT_CONFIG) { else if (r != ONIG_NO_SUPPORT_CONFIG) {
return r; return r;
@ -3860,14 +3888,14 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
if (not != 0) { if (not != 0) {
for (c = 0; c < SINGLE_BYTE_SIZE; c++) { for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
BITSET_SET_BIT(cc->bs, c); BITSET_SET_BIT_CHKDUP(cc->bs, c);
} }
ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
} }
else { else {
for (c = 0; c < SINGLE_BYTE_SIZE; c++) { for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
BITSET_SET_BIT(cc->bs, c); BITSET_SET_BIT_CHKDUP(cc->bs, c);
} }
} }
break; break;
@ -3877,13 +3905,13 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
if (not != 0) { if (not != 0) {
for (c = 0; c < SINGLE_BYTE_SIZE; c++) { for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
BITSET_SET_BIT(cc->bs, c); BITSET_SET_BIT_CHKDUP(cc->bs, c);
} }
} }
else { else {
for (c = 0; c < SINGLE_BYTE_SIZE; c++) { for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
BITSET_SET_BIT(cc->bs, c); BITSET_SET_BIT_CHKDUP(cc->bs, c);
} }
ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
} }
@ -3892,7 +3920,7 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
case ONIGENC_CTYPE_WORD: case ONIGENC_CTYPE_WORD:
if (not == 0) { if (not == 0) {
for (c = 0; c < SINGLE_BYTE_SIZE; c++) { for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
if (IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c); if (IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT_CHKDUP(cc->bs, c);
} }
ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
} }
@ -3900,7 +3928,7 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
for (c = 0; c < SINGLE_BYTE_SIZE; c++) { for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */ if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */
&& ! ONIGENC_IS_CODE_WORD(enc, c)) && ! ONIGENC_IS_CODE_WORD(enc, c))
BITSET_SET_BIT(cc->bs, c); BITSET_SET_BIT_CHKDUP(cc->bs, c);
} }
} }
break; break;
@ -4065,7 +4093,7 @@ next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
if (*state == CCS_VALUE && *type != CCV_CLASS) { if (*state == CCS_VALUE && *type != CCV_CLASS) {
if (*type == CCV_SB) if (*type == CCV_SB)
BITSET_SET_BIT(cc->bs, (int )(*vs)); BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
else if (*type == CCV_CODE_POINT) { else if (*type == CCV_CODE_POINT) {
r = add_code_range(&(cc->mbuf), env, *vs, *vs); r = add_code_range(&(cc->mbuf), env, *vs, *vs);
if (r < 0) return r; if (r < 0) return r;
@ -4088,7 +4116,7 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
switch (*state) { switch (*state) {
case CCS_VALUE: case CCS_VALUE:
if (*type == CCV_SB) if (*type == CCV_SB)
BITSET_SET_BIT(cc->bs, (int )(*vs)); BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
else if (*type == CCV_CODE_POINT) { else if (*type == CCV_CODE_POINT) {
r = add_code_range(&(cc->mbuf), env, *vs, *vs); r = add_code_range(&(cc->mbuf), env, *vs, *vs);
if (r < 0) return r; if (r < 0) return r;
@ -4107,7 +4135,7 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
else else
return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
} }
bitset_set_range(cc->bs, (int )*vs, (int )v); bitset_set_range(env, cc->bs, (int )*vs, (int )v);
} }
else { else {
r = add_code_range(&(cc->mbuf), env, *vs, v); r = add_code_range(&(cc->mbuf), env, *vs, v);
@ -4124,7 +4152,7 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
else else
return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
} }
bitset_set_range(cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff)); bitset_set_range(env, cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v); r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v);
if (r < 0) return r; if (r < 0) return r;
#if 0 #if 0
@ -4407,7 +4435,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
r = parse_char_class(&anode, tok, &p, end, env); r = parse_char_class(&anode, tok, &p, end, env);
if (r != 0) goto cc_open_err; if (r != 0) goto cc_open_err;
acc = NCCLASS(anode); acc = NCCLASS(anode);
r = or_cclass(cc, acc, env->enc); r = or_cclass(cc, acc, env);
onig_node_free(anode); onig_node_free(anode);
cc_open_err: cc_open_err:
@ -4427,7 +4455,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
state = CCS_START; state = CCS_START;
if (IS_NOT_NULL(prev_cc)) { if (IS_NOT_NULL(prev_cc)) {
r = and_cclass(prev_cc, cc, env->enc); r = and_cclass(prev_cc, cc, env);
if (r != 0) goto err; if (r != 0) goto err;
bbuf_free(cc->mbuf); bbuf_free(cc->mbuf);
} }
@ -4464,7 +4492,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
} }
if (IS_NOT_NULL(prev_cc)) { if (IS_NOT_NULL(prev_cc)) {
r = and_cclass(prev_cc, cc, env->enc); r = and_cclass(prev_cc, cc, env);
if (r != 0) goto err; if (r != 0) goto err;
bbuf_free(cc->mbuf); bbuf_free(cc->mbuf);
cc = prev_cc; cc = prev_cc;
@ -4487,7 +4515,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) { if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {
if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1) if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
BITSET_SET_BIT(cc->bs, NEWLINE_CODE); BITSET_SET_BIT_CHKDUP(cc->bs, NEWLINE_CODE);
else else
add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE); add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);
} }
@ -4983,7 +5011,7 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
add_code_range(&(cc->mbuf), env, *to, *to); add_code_range(&(cc->mbuf), env, *to, *to);
} }
else { else {
BITSET_SET_BIT(bs, *to); BITSET_SET_BIT_CHKDUP(bs, *to);
} }
} }
#else #else
@ -4997,7 +5025,7 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
BITSET_CLEAR_BIT(bs, *to); BITSET_CLEAR_BIT(bs, *to);
} }
else else
BITSET_SET_BIT(bs, *to); BITSET_SET_BIT_CHKDUP(bs, *to);
} }
} }
#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */ #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */

View File

@ -306,6 +306,7 @@ typedef struct {
int curr_max_regnum; int curr_max_regnum;
int has_recursion; int has_recursion;
#endif #endif
int warnings_flag;
} ScanEnv; } ScanEnv;