Add default cases for cache point finding function

This commit is contained in:
TSUYUSATO Kitsune 2022-11-17 13:58:08 +09:00 committed by Yusuke Endoh
parent 90bfac296e
commit 189e3c0ada
Notes: git 2022-11-17 14:19:46 +00:00

View File

@ -234,10 +234,8 @@ onig_get_capture_tree(OnigRegion* region)
#ifdef USE_CACHE_MATCH_OPT #ifdef USE_CACHE_MATCH_OPT
/* count number of jump-like opcodes for allocation of cache memory. */ /* count number of jump-like opcodes for allocation of cache memory. */
/* return -1 if we cannot optimize the regex matching by using cache. */ static OnigPosition count_num_cache_opcode(regex_t* reg, long* num, long* table_size)
static long count_num_cache_opcode(regex_t* reg, long* table_size)
{ {
long num = 0;
UChar* p = reg->p; UChar* p = reg->p;
UChar* pend = p + reg->used; UChar* pend = p + reg->used;
LengthType len; LengthType len;
@ -246,6 +244,9 @@ static long count_num_cache_opcode(regex_t* reg, long* table_size)
long current_mem_num = 0; long current_mem_num = 0;
OnigEncoding enc = reg->enc; OnigEncoding enc = reg->enc;
*num = 0;
*table_size = 0;
while (p < pend) { while (p < pend) {
switch (*p++) { switch (*p++) {
case OP_FINISH: case OP_FINISH:
@ -298,10 +299,10 @@ static long count_num_cache_opcode(regex_t* reg, long* table_size)
break; break;
case OP_ANYCHAR_STAR: case OP_ANYCHAR_STAR:
case OP_ANYCHAR_ML_STAR: case OP_ANYCHAR_ML_STAR:
num++; *table_size += 1; break; *num += 1; *table_size += 1; break;
case OP_ANYCHAR_STAR_PEEK_NEXT: case OP_ANYCHAR_STAR_PEEK_NEXT:
case OP_ANYCHAR_ML_STAR_PEEK_NEXT: case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
p++; num++; *table_size += 1; break; p++; *num += 1; *table_size += 1; break;
case OP_WORD: case OP_WORD:
case OP_NOT_WORD: case OP_NOT_WORD:
@ -334,7 +335,7 @@ static long count_num_cache_opcode(regex_t* reg, long* table_size)
case OP_BACKREF_MULTI: case OP_BACKREF_MULTI:
case OP_BACKREF_MULTI_IC: case OP_BACKREF_MULTI_IC:
case OP_BACKREF_WITH_LEVEL: case OP_BACKREF_WITH_LEVEL:
return NUM_CACHE_OPCODE_FAIL; goto fail;
case OP_MEMORY_START: case OP_MEMORY_START:
case OP_MEMORY_START_PUSH: case OP_MEMORY_START_PUSH:
@ -354,43 +355,43 @@ static long count_num_cache_opcode(regex_t* reg, long* table_size)
break; break;
case OP_PUSH: case OP_PUSH:
p += SIZE_RELADDR; p += SIZE_RELADDR;
num++; *num += 1;
*table_size += 1; *table_size += 1;
break; break;
case OP_POP: case OP_POP:
break; break;
case OP_PUSH_OR_JUMP_EXACT1: case OP_PUSH_OR_JUMP_EXACT1:
case OP_PUSH_IF_PEEK_NEXT: case OP_PUSH_IF_PEEK_NEXT:
p += SIZE_RELADDR + 1; num++; *table_size += 1; break; p += SIZE_RELADDR + 1; *num += 1; *table_size += 1; break;
case OP_REPEAT: case OP_REPEAT:
case OP_REPEAT_NG: case OP_REPEAT_NG:
if (current_mem != -1) { if (current_mem != -1) {
// A nested OP_REPEAT is not yet supported. // A nested OP_REPEAT is not yet supported.
return NUM_CACHE_OPCODE_FAIL; goto fail;
} }
GET_MEMNUM_INC(mem, p); GET_MEMNUM_INC(mem, p);
p += SIZE_RELADDR; p += SIZE_RELADDR;
if (reg->repeat_range[mem].lower == 0) { if (reg->repeat_range[mem].lower == 0) {
num++; *num += 1;
*table_size += 1; *table_size += 1;
} }
reg->repeat_range[mem].base_num = num; reg->repeat_range[mem].base_num = *num;
current_mem = mem; current_mem = mem;
current_mem_num = num; current_mem_num = *num;
break; break;
case OP_REPEAT_INC: case OP_REPEAT_INC:
case OP_REPEAT_INC_NG: case OP_REPEAT_INC_NG:
GET_MEMNUM_INC(mem, p); GET_MEMNUM_INC(mem, p);
if (mem != current_mem) { if (mem != current_mem) {
// A lone or invalid OP_REPEAT_INC is found. // A lone or invalid OP_REPEAT_INC is found.
return NUM_CACHE_OPCODE_FAIL; goto fail;
} }
{ {
long inner_num = num - current_mem_num; long inner_num = *num - current_mem_num;
OnigRepeatRange *repeat_range = &reg->repeat_range[mem]; OnigRepeatRange *repeat_range = &reg->repeat_range[mem];
repeat_range->inner_num = inner_num; repeat_range->inner_num = inner_num;
num -= inner_num; *num -= inner_num;
num += inner_num * repeat_range->lower + (inner_num + 1) * (repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower); *num += inner_num * repeat_range->lower + (inner_num + 1) * (repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower);
if (repeat_range->lower < repeat_range->upper) { if (repeat_range->lower < repeat_range->upper) {
*table_size += 1; *table_size += 1;
} }
@ -401,7 +402,7 @@ static long count_num_cache_opcode(regex_t* reg, long* table_size)
case OP_REPEAT_INC_SG: case OP_REPEAT_INC_SG:
case OP_REPEAT_INC_NG_SG: case OP_REPEAT_INC_NG_SG:
// TODO: Support nested OP_REPEAT. // TODO: Support nested OP_REPEAT.
return NUM_CACHE_OPCODE_FAIL; goto fail;
case OP_NULL_CHECK_START: case OP_NULL_CHECK_START:
case OP_NULL_CHECK_END: case OP_NULL_CHECK_END:
case OP_NULL_CHECK_END_MEMST: case OP_NULL_CHECK_END_MEMST:
@ -420,33 +421,43 @@ static long count_num_cache_opcode(regex_t* reg, long* table_size)
case OP_PUSH_ABSENT_POS: case OP_PUSH_ABSENT_POS:
case OP_ABSENT_END: case OP_ABSENT_END:
case OP_ABSENT: case OP_ABSENT:
return NUM_CACHE_OPCODE_FAIL; goto fail;
case OP_CALL: case OP_CALL:
case OP_RETURN: case OP_RETURN:
return NUM_CACHE_OPCODE_FAIL; goto fail;
case OP_CONDITION: case OP_CONDITION:
return NUM_CACHE_OPCODE_FAIL; goto fail;
case OP_STATE_CHECK_PUSH: case OP_STATE_CHECK_PUSH:
case OP_STATE_CHECK_PUSH_OR_JUMP: case OP_STATE_CHECK_PUSH_OR_JUMP:
case OP_STATE_CHECK: case OP_STATE_CHECK:
case OP_STATE_CHECK_ANYCHAR_STAR: case OP_STATE_CHECK_ANYCHAR_STAR:
case OP_STATE_CHECK_ANYCHAR_ML_STAR: case OP_STATE_CHECK_ANYCHAR_ML_STAR:
return NUM_CACHE_OPCODE_FAIL; goto fail;
case OP_SET_OPTION_PUSH: case OP_SET_OPTION_PUSH:
case OP_SET_OPTION: case OP_SET_OPTION:
p += SIZE_OPTION; p += SIZE_OPTION;
break; break;
default:
goto bytecode_error;
} }
} }
return num; return 0;
fail:
*num = NUM_CACHE_OPCODE_FAIL;
return 0;
bytecode_error:
return ONIGERR_UNDEFINED_BYTECODE;
} }
static void init_cache_index_table(regex_t* reg, OnigCacheIndex *table) static OnigPosition init_cache_index_table(regex_t* reg, OnigCacheIndex *table)
{ {
UChar* pbegin; UChar* pbegin;
UChar* p = reg->p; UChar* p = reg->p;
@ -558,7 +569,7 @@ static void init_cache_index_table(regex_t* reg, OnigCacheIndex *table)
case OP_BACKREF_MULTI: case OP_BACKREF_MULTI:
case OP_BACKREF_MULTI_IC: case OP_BACKREF_MULTI_IC:
case OP_BACKREF_WITH_LEVEL: case OP_BACKREF_WITH_LEVEL:
return; goto unexpected_bytecode_error;
case OP_MEMORY_START: case OP_MEMORY_START:
case OP_MEMORY_START_PUSH: case OP_MEMORY_START_PUSH:
@ -630,7 +641,7 @@ static void init_cache_index_table(regex_t* reg, OnigCacheIndex *table)
case OP_REPEAT_INC_SG: case OP_REPEAT_INC_SG:
case OP_REPEAT_INC_NG_SG: case OP_REPEAT_INC_NG_SG:
// TODO: support OP_REPEAT opcodes. // TODO: support OP_REPEAT opcodes.
return; goto unexpected_bytecode_error;
case OP_NULL_CHECK_START: case OP_NULL_CHECK_START:
case OP_NULL_CHECK_END: case OP_NULL_CHECK_END:
case OP_NULL_CHECK_END_MEMST: case OP_NULL_CHECK_END_MEMST:
@ -649,28 +660,39 @@ static void init_cache_index_table(regex_t* reg, OnigCacheIndex *table)
case OP_PUSH_ABSENT_POS: case OP_PUSH_ABSENT_POS:
case OP_ABSENT_END: case OP_ABSENT_END:
case OP_ABSENT: case OP_ABSENT:
return; goto unexpected_bytecode_error;
case OP_CALL: case OP_CALL:
case OP_RETURN: case OP_RETURN:
return; goto unexpected_bytecode_error;
case OP_CONDITION: case OP_CONDITION:
return; goto unexpected_bytecode_error;
case OP_STATE_CHECK_PUSH: case OP_STATE_CHECK_PUSH:
case OP_STATE_CHECK_PUSH_OR_JUMP: case OP_STATE_CHECK_PUSH_OR_JUMP:
case OP_STATE_CHECK: case OP_STATE_CHECK:
case OP_STATE_CHECK_ANYCHAR_STAR: case OP_STATE_CHECK_ANYCHAR_STAR:
case OP_STATE_CHECK_ANYCHAR_ML_STAR: case OP_STATE_CHECK_ANYCHAR_ML_STAR:
return; goto unexpected_bytecode_error;
case OP_SET_OPTION_PUSH: case OP_SET_OPTION_PUSH:
case OP_SET_OPTION: case OP_SET_OPTION:
p += SIZE_OPTION; p += SIZE_OPTION;
break; break;
default:
goto bytecode_error;
} }
} }
return 0;
unexpected_bytecode_error:
return ONIGERR_UNEXPECTED_BYTECODE;
bytecode_error:
return ONIGERR_UNDEFINED_BYTECODE;
} }
#endif /* USE_MATCH_CACHE */ #endif /* USE_MATCH_CACHE */
@ -861,6 +883,7 @@ onig_region_copy(OnigRegion* to, const OnigRegion* from)
(msa).enable_cache_match_opt = 0;\ (msa).enable_cache_match_opt = 0;\
(msa).num_fail = 0;\ (msa).num_fail = 0;\
(msa).num_cache_opcode = NUM_CACHE_OPCODE_UNINIT;\ (msa).num_cache_opcode = NUM_CACHE_OPCODE_UNINIT;\
(msa).num_cache_table = 0;\
(msa).cache_index_table = (OnigCacheIndex *)0;\ (msa).cache_index_table = (OnigCacheIndex *)0;\
(msa).match_cache = (uint8_t *)0;\ (msa).match_cache = (uint8_t *)0;\
} while(0) } while(0)
@ -3820,23 +3843,26 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#ifdef USE_CACHE_MATCH_OPT #ifdef USE_CACHE_MATCH_OPT
if (++msa->num_fail >= (long)(end - str) + 1 && msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) { if (++msa->num_fail >= (long)(end - str) + 1 && msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) {
long table_size = 0;
msa->enable_cache_match_opt = 1; msa->enable_cache_match_opt = 1;
if (msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) { if (msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) {
msa->num_cache_opcode = count_num_cache_opcode(reg, &table_size); OnigPosition r = count_num_cache_opcode(reg, &msa->num_cache_opcode, &msa->num_cache_table);
if (r < 0) goto bytecode_error;
} }
if (msa->num_cache_opcode == NUM_CACHE_OPCODE_FAIL || msa->num_cache_opcode == 0) { if (msa->num_cache_opcode == NUM_CACHE_OPCODE_FAIL || msa->num_cache_opcode == 0) {
msa->enable_cache_match_opt = 0; msa->enable_cache_match_opt = 0;
goto fail_match_cache_opt; goto fail_match_cache_opt;
} }
if (msa->cache_index_table == NULL) { if (msa->cache_index_table == NULL) {
OnigCacheIndex *table = (OnigCacheIndex *)xmalloc(table_size * sizeof(OnigCacheIndex)); OnigCacheIndex *table = (OnigCacheIndex *)xmalloc(msa->num_cache_table * sizeof(OnigCacheIndex));
if (table == NULL) { if (table == NULL) {
return ONIGERR_MEMORY; return ONIGERR_MEMORY;
} }
init_cache_index_table(reg, table); OnigPosition r = init_cache_index_table(reg, table);
if (r < 0) {
if (r == ONIGERR_UNEXPECTED_BYTECODE) goto unexpected_bytecode_error;
else goto bytecode_error;
}
msa->cache_index_table = table; msa->cache_index_table = table;
msa->num_cache_table = table_size;
} }
size_t len = (end - str) + 1; size_t len = (end - str) + 1;
size_t match_cache_size8 = (size_t)msa->num_cache_opcode * len; size_t match_cache_size8 = (size_t)msa->num_cache_opcode * len;