* Merge Onigmo 0fe387da2fee089254f6b04990541c731a26757f

v5.13.3 [Bug#7972] [Bug#7974]

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@39547 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2013-03-01 16:36:37 +00:00
parent 76855753d7
commit 78dbaa1648
8 changed files with 80 additions and 56 deletions

View File

@ -1,3 +1,8 @@
Sat Mar 2 01:33:17 2013 NARUSE, Yui <naruse@ruby-lang.org>
* Merge Onigmo 0fe387da2fee089254f6b04990541c731a26757f
v5.13.3 [Bug#7972] [Bug#7974]
Fri Mar 1 11:09:06 2013 Eric Hodel <drbrain@segment7.net> Fri Mar 1 11:09:06 2013 Eric Hodel <drbrain@segment7.net>
* lib/fileutils.rb: Revert r34669 which altered the way * lib/fileutils.rb: Revert r34669 which altered the way

View File

@ -231,7 +231,7 @@ code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
} }
else if (code <= 0xffff) { else if (code <= 0xffff) {
int low = code & 0xff; int low = code & 0xff;
if (low < 0x40 || low == 0x7f || 0xfc < low) if (! SJIS_ISMB_TRAIL(low))
return ONIGERR_INVALID_CODE_POINT_VALUE; return ONIGERR_INVALID_CODE_POINT_VALUE;
return 2; return 2;
} }

View File

@ -40,7 +40,7 @@ extern "C" {
#define ONIGURUMA #define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 5 #define ONIGURUMA_VERSION_MAJOR 5
#define ONIGURUMA_VERSION_MINOR 13 #define ONIGURUMA_VERSION_MINOR 13
#define ONIGURUMA_VERSION_TEENY 1 #define ONIGURUMA_VERSION_TEENY 3
#ifdef __cplusplus #ifdef __cplusplus
# ifndef HAVE_PROTOTYPES # ifndef HAVE_PROTOTYPES

View File

@ -3,7 +3,7 @@
**********************************************************************/ **********************************************************************/
/*- /*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2011-2012 K.Takata <kentkt AT csc DOT jp> * Copyright (c) 2011-2013 K.Takata <kentkt AT csc DOT jp>
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
@ -118,7 +118,7 @@ static int
bitset_is_empty(BitSetRef bs) bitset_is_empty(BitSetRef bs)
{ {
int i; int i;
for (i = 0; i < (int )BITSET_SIZE; i++) { for (i = 0; i < BITSET_SIZE; i++) {
if (bs[i] != 0) return 0; if (bs[i] != 0) return 0;
} }
return 1; return 1;
@ -4311,7 +4311,7 @@ typedef struct {
OptAncInfo anc; OptAncInfo anc;
int reach_end; int reach_end;
int ignore_case; int ignore_case; /* -1: unset, 0: case sensitive, 1: ignore case */
int len; int len;
UChar s[OPT_EXACT_MAXLEN]; UChar s[OPT_EXACT_MAXLEN];
} OptExactInfo; } OptExactInfo;
@ -4548,7 +4548,7 @@ clear_opt_exact_info(OptExactInfo* ex)
clear_mml(&ex->mmd); clear_mml(&ex->mmd);
clear_opt_anc_info(&ex->anc); clear_opt_anc_info(&ex->anc);
ex->reach_end = 0; ex->reach_end = 0;
ex->ignore_case = 0; ex->ignore_case = -1; /* unset */
ex->len = 0; ex->len = 0;
ex->s[0] = '\0'; ex->s[0] = '\0';
} }
@ -4566,11 +4566,10 @@ concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
UChar *p, *end; UChar *p, *end;
OptAncInfo tanc; OptAncInfo tanc;
if (! to->ignore_case && add->ignore_case) { if (to->ignore_case < 0)
if (to->len >= add->len) return ; /* avoid */ to->ignore_case = add->ignore_case;
else if (to->ignore_case != add->ignore_case)
to->ignore_case = 1; return ; /* avoid */
}
p = add->s; p = add->s;
end = p + add->len; end = p + add->len;
@ -4636,7 +4635,10 @@ alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
to->reach_end = 0; to->reach_end = 0;
} }
to->len = i; to->len = i;
to->ignore_case |= add->ignore_case; if (to->ignore_case < 0)
to->ignore_case = add->ignore_case;
else if (add->ignore_case >= 0)
to->ignore_case |= add->ignore_case;
alt_merge_opt_anc_info(&to->anc, &add->anc); alt_merge_opt_anc_info(&to->anc, &add->anc);
if (! to->reach_end) to->anc.right_anchor = 0; if (! to->reach_end) to->anc.right_anchor = 0;
@ -4666,8 +4668,8 @@ select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
if (alt->len > 1) v2 += 5; if (alt->len > 1) v2 += 5;
} }
if (now->ignore_case == 0) v1 *= 2; if (now->ignore_case <= 0) v1 *= 2;
if (alt->ignore_case == 0) v2 *= 2; if (alt->ignore_case <= 0) v2 *= 2;
if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
copy_opt_exact_info(now, alt); copy_opt_exact_info(now, alt);
@ -4765,7 +4767,7 @@ comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
if (m->value <= 0) return -1; if (m->value <= 0) return -1;
ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2); ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
vm = COMP_EM_BASE * 5 * 2 / m->value; vm = COMP_EM_BASE * 5 * 2 / m->value;
return comp_distance_value(&e->mmd, &m->mmd, ve, vm); return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
} }
@ -4947,7 +4949,8 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
if (! NSTRING_IS_AMBIG(node)) { if (! NSTRING_IS_AMBIG(node)) {
concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
NSTRING_IS_RAW(node), env->enc); is_raw, env->enc);
opt->exb.ignore_case = 0;
if (slen > 0) { if (slen > 0) {
add_char_opt_map_info(&opt->map, *(sn->s), env->enc); add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
} }
@ -5260,7 +5263,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
allow_reverse = allow_reverse =
ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end); ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
if (e->ignore_case) { if (e->ignore_case > 0) {
if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
r = set_bm_skip(reg->exact, reg->exact_end, reg, r = set_bm_skip(reg->exact, reg->exact_end, reg,
reg->map, &(reg->int_map), 1); reg->map, &(reg->int_map), 1);

View File

@ -2559,7 +2559,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
(int )mem, (intptr_t )s, s); (int )mem, (intptr_t )s, s);
#endif #endif
if (isnull == -1) goto fail; if (isnull == -1) goto fail;
goto null_check_found; goto null_check_found;
} }
} }
MOP_OUT; MOP_OUT;
@ -2585,7 +2585,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
(int )mem, (intptr_t )s, s); (int )mem, (intptr_t )s, s);
#endif #endif
if (isnull == -1) goto fail; if (isnull == -1) goto fail;
goto null_check_found; goto null_check_found;
} }
else { else {
STACK_PUSH_NULL_CHECK_END(mem); STACK_PUSH_NULL_CHECK_END(mem);

View File

@ -390,7 +390,7 @@ typedef unsigned int BitStatusType;
/* bitset */ /* bitset */
#define BITS_PER_BYTE 8 #define BITS_PER_BYTE 8
#define SINGLE_BYTE_SIZE (1 << BITS_PER_BYTE) #define SINGLE_BYTE_SIZE (1 << BITS_PER_BYTE)
#define BITS_IN_ROOM ((int)sizeof(Bits) * BITS_PER_BYTE) #define BITS_IN_ROOM ((int )sizeof(Bits) * BITS_PER_BYTE)
#define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM) #define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM)
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
@ -405,11 +405,11 @@ typedef Bits* BitSetRef;
#define BITSET_CLEAR(bs) do {\ #define BITSET_CLEAR(bs) do {\
int i;\ int i;\
for (i = 0; i < (int )BITSET_SIZE; i++) { (bs)[i] = 0; } \ for (i = 0; i < BITSET_SIZE; i++) { (bs)[i] = 0; } \
} while (0) } while (0)
#define BS_ROOM(bs,pos) (bs)[(int)(pos) / BITS_IN_ROOM] #define BS_ROOM(bs,pos) (bs)[(int )(pos) / BITS_IN_ROOM]
#define BS_BIT(pos) (1 << ((int)(pos) % BITS_IN_ROOM)) #define BS_BIT(pos) (1 << ((int )(pos) % BITS_IN_ROOM))
#define BITSET_AT(bs, pos) (BS_ROOM(bs,pos) & BS_BIT(pos)) #define BITSET_AT(bs, pos) (BS_ROOM(bs,pos) & BS_BIT(pos))
#define BITSET_SET_BIT(bs, pos) BS_ROOM(bs,pos) |= BS_BIT(pos) #define BITSET_SET_BIT(bs, pos) BS_ROOM(bs,pos) |= BS_BIT(pos)
@ -457,7 +457,7 @@ typedef struct _BBuf {
#define BBUF_WRITE1(buf,pos,byte) do{\ #define BBUF_WRITE1(buf,pos,byte) do{\
int used = (pos) + 1;\ int used = (pos) + 1;\
if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\ if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\
(buf)->p[(pos)] = (byte);\ (buf)->p[(pos)] = (UChar )(byte);\
if ((buf)->used < (unsigned int )used) (buf)->used = used;\ if ((buf)->used < (unsigned int )used) (buf)->used = used;\
} while (0) } while (0)

View File

@ -3,7 +3,7 @@
**********************************************************************/ **********************************************************************/
/*- /*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2011-2012 K.Takata <kentkt AT csc DOT jp> * Copyright (c) 2011-2013 K.Takata <kentkt AT csc DOT jp>
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
@ -156,7 +156,7 @@ bbuf_clone(BBuf** rto, BBuf* from)
#define BITSET_IS_EMPTY(bs,empty) do {\ #define BITSET_IS_EMPTY(bs,empty) do {\
int i;\ int i;\
empty = 1;\ empty = 1;\
for (i = 0; i < (int )BITSET_SIZE; i++) {\ for (i = 0; i < BITSET_SIZE; i++) {\
if ((bs)[i] != 0) {\ if ((bs)[i] != 0) {\
empty = 0; break;\ empty = 0; break;\
}\ }\
@ -185,35 +185,35 @@ static void
bitset_invert(BitSetRef bs) bitset_invert(BitSetRef bs)
{ {
int i; int i;
for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); } for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }
} }
static void static void
bitset_invert_to(BitSetRef from, BitSetRef to) bitset_invert_to(BitSetRef from, BitSetRef to)
{ {
int i; int i;
for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); } for (i = 0; i < BITSET_SIZE; i++) { to[i] = ~(from[i]); }
} }
static void static void
bitset_and(BitSetRef dest, BitSetRef bs) bitset_and(BitSetRef dest, BitSetRef bs)
{ {
int i; int i;
for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; } for (i = 0; i < BITSET_SIZE; i++) { dest[i] &= bs[i]; }
} }
static void static void
bitset_or(BitSetRef dest, BitSetRef bs) bitset_or(BitSetRef dest, BitSetRef bs)
{ {
int i; int i;
for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; } for (i = 0; i < BITSET_SIZE; i++) { dest[i] |= bs[i]; }
} }
static void static void
bitset_copy(BitSetRef dest, BitSetRef bs) bitset_copy(BitSetRef dest, BitSetRef bs)
{ {
int i; int i;
for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; } for (i = 0; i < BITSET_SIZE; i++) { dest[i] = bs[i]; }
} }
extern int extern int
@ -425,9 +425,6 @@ typedef struct {
typedef st_table NameTable; typedef st_table NameTable;
typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */ typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */
#define NAMEBUF_SIZE 24
#define NAMEBUF_SIZE_1 25
#ifdef ONIG_DEBUG #ifdef ONIG_DEBUG
static int static int
i_print_name_entry(UChar* key, NameEntry* e, void* arg) i_print_name_entry(UChar* key, NameEntry* e, void* arg)
@ -589,7 +586,7 @@ onig_number_of_names(regex_t* reg)
NameTable* t = (NameTable* )reg->name_table; NameTable* t = (NameTable* )reg->name_table;
if (IS_NOT_NULL(t)) if (IS_NOT_NULL(t))
return (int)t->num_entries; return (int )t->num_entries;
else else
return 0; return 0;
} }
@ -2627,7 +2624,7 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
name_end = p; name_end = p;
PFETCH(c); PFETCH(c);
if (c == end_code || c == ')' || c == '+' || c == '-') { if (c == end_code || c == ')' || c == '+' || c == '-') {
if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME; if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;
break; break;
} }
@ -2752,7 +2749,7 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
name_end = p; name_end = p;
PFETCH(c); PFETCH(c);
if (c == end_code || c == ')') { if (c == end_code || c == ')') {
if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME; if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;
break; break;
} }
@ -4124,24 +4121,36 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, int char_prop, ScanEnv* env)
r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges); r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
if (r == 0) { if (r == 0) {
r = add_ctype_to_cc_by_range(cc, ctype, not, env, sb_out, ranges); if (ascii_range) {
if ((r == 0) && ascii_range) { CClassNode ccwork;
if (not != 0) { initialize_cclass(&ccwork);
r = add_code_range_to_buf0(&(cc->mbuf), env, 0x80, ONIG_LAST_CODE_POINT, FALSE); r = add_ctype_to_cc_by_range(&ccwork, ctype, not, env, sb_out,
} ranges);
else { if (r == 0) {
CClassNode ccascii; if (not) {
initialize_cclass(&ccascii); r = add_code_range_to_buf0(&(ccwork.mbuf), env, 0x80, ONIG_LAST_CODE_POINT, FALSE);
if (ONIGENC_MBC_MINLEN(env->enc) > 1) {
add_code_range(&(ccascii.mbuf), env, 0x00, 0x7F);
} }
else { else {
bitset_set_range(env, ccascii.bs, 0x00, 0x7F); CClassNode ccascii;
initialize_cclass(&ccascii);
if (ONIGENC_MBC_MINLEN(env->enc) > 1) {
add_code_range(&(ccascii.mbuf), env, 0x00, 0x7F);
}
else {
bitset_set_range(env, ccascii.bs, 0x00, 0x7F);
}
r = and_cclass(&ccwork, &ccascii, env);
if (IS_NOT_NULL(ccascii.mbuf)) bbuf_free(ccascii.mbuf);
} }
r = and_cclass(cc, &ccascii, env); if (r == 0) {
if (IS_NOT_NULL(ccascii.mbuf)) bbuf_free(ccascii.mbuf); r = or_cclass(cc, &ccwork, env);
}
if (IS_NOT_NULL(ccwork.mbuf)) bbuf_free(ccwork.mbuf);
} }
} }
else {
r = add_ctype_to_cc_by_range(cc, ctype, not, env, sb_out, ranges);
}
return r; return r;
} }
else if (r != ONIG_NO_SUPPORT_CONFIG) { else if (r != ONIG_NO_SUPPORT_CONFIG) {
@ -4562,7 +4571,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
UChar* psave = p; UChar* psave = p;
int i, base = tok->base; int i, base = tok->base;
buf[0] = tok->u.c; buf[0] = (UChar )tok->u.c;
for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) { for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
r = fetch_token_in_cc(tok, &p, end, env); r = fetch_token_in_cc(tok, &p, end, env);
if (r < 0) goto err; if (r < 0) goto err;
@ -4570,7 +4579,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
fetched = 1; fetched = 1;
break; break;
} }
buf[i] = tok->u.c; buf[i] = (UChar )tok->u.c;
} }
if (i < ONIGENC_MBC_MINLEN(env->enc)) { if (i < ONIGENC_MBC_MINLEN(env->enc)) {
@ -4706,7 +4715,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) { if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {
CC_ESC_WARN(env, (UChar* )"-"); CC_ESC_WARN(env, (UChar* )"-");
goto range_end_val; /* [0-9-a] is allowed as [0-9\-a] */ goto range_end_val; /* [0-9-a] is allowed as [0-9\-a] */
} }
r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS; r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
goto err; goto err;
@ -5684,7 +5693,7 @@ countbits(unsigned int bits)
static int static int
is_onechar_cclass(CClassNode* cc, OnigCodePoint* code) is_onechar_cclass(CClassNode* cc, OnigCodePoint* code)
{ {
const OnigCodePoint not_found = (OnigCodePoint)-1; const OnigCodePoint not_found = ONIG_LAST_CODE_POINT;
OnigCodePoint c = not_found; OnigCodePoint c = not_found;
int i; int i;
BBuf *bbuf = cc->mbuf; BBuf *bbuf = cc->mbuf;
@ -5710,7 +5719,7 @@ is_onechar_cclass(CClassNode* cc, OnigCodePoint* code)
} }
/* check bitset */ /* check bitset */
for (i = 0; i < (int )BITSET_SIZE; i++) { for (i = 0; i < BITSET_SIZE; i++) {
Bits b1 = cc->bs[i]; Bits b1 = cc->bs[i];
if (b1 != 0) { if (b1 != 0) {
if (((b1 & (b1 - 1)) == 0) && (c == not_found)) { if (((b1 & (b1 - 1)) == 0) && (c == not_found)) {

View File

@ -225,13 +225,20 @@ def parse_block(data)
blocks << constname blocks << constname
end end
# shim for Ruby 1.8
unless {}.respond_to?(:key)
class Hash
alias key index
end
end
$const_cache = {} $const_cache = {}
# make_const(property, pairs, name): Prints a 'static const' structure for a # make_const(property, pairs, name): Prints a 'static const' structure for a
# given property, group of paired codepoints, and a human-friendly name for # given property, group of paired codepoints, and a human-friendly name for
# the group # the group
def make_const(prop, data, name) def make_const(prop, data, name)
puts "\n/* '#{prop}': #{name} */" puts "\n/* '#{prop}': #{name} */"
if origprop = $const_cache.index(data) # don't use Hash#key because it is 1.9 feature if origprop = $const_cache.key(data)
puts "#define CR_#{prop} CR_#{origprop}" puts "#define CR_#{prop} CR_#{origprop}"
else else
$const_cache[prop] = data $const_cache[prop] = data