Revert "Refactor field names"

This reverts commit 1e6673d6bbd2adbf555d82c7c0906ceb148ed6ee.
This commit is contained in:
TSUYUSATO Kitsune 2022-11-08 15:13:27 +09:00 committed by Yusuke Endoh
parent 22294731a8
commit a1c1fc558a
3 changed files with 169 additions and 162 deletions

View File

@ -744,9 +744,8 @@ typedef struct {
typedef struct { typedef struct {
int lower; int lower;
int upper; int upper;
/* These fields are for cache optimization. */ int base_num;
int base_point; int inner_num;
int inner_point;
} OnigRepeatRange; } OnigRepeatRange;
typedef void (*OnigWarnFunc)(const char* s); typedef void (*OnigWarnFunc)(const char* s);

303
regexec.c
View File

@ -233,17 +233,19 @@ onig_get_capture_tree(OnigRegion* region)
#ifdef USE_CACHE_MATCH_OPT #ifdef USE_CACHE_MATCH_OPT
static int count_num_cache_index(regex_t* reg) /* count number of jump-like opcodes for allocation of cache memory. */
/* return -1 if we cannot optimize the regex matching by using cache. */
static int count_num_cache_opcode(regex_t* reg, int* table_size)
{ {
UChar* p = reg->p; int num = 0;
UChar* pend = p + reg->used; UChar* p = reg->p;
LengthType len; UChar* pend = p + reg->used;
MemNumType mem; LengthType len;
MemNumType mem;
MemNumType current_mem = -1;
int current_mem_num = 0;
OnigEncoding enc = reg->enc; OnigEncoding enc = reg->enc;
int num_cache_index = 0;
MemNumType current_repeat = NO_OUTER_REPEAT;
while (p < pend) { while (p < pend) {
switch (*p++) { switch (*p++) {
case OP_FINISH: case OP_FINISH:
@ -296,10 +298,10 @@ static int count_num_cache_index(regex_t* reg)
break; break;
case OP_ANYCHAR_STAR: case OP_ANYCHAR_STAR:
case OP_ANYCHAR_ML_STAR: case OP_ANYCHAR_ML_STAR:
num_cache_index++; break; num++; *table_size += 1; break;
case OP_ANYCHAR_STAR_PEEK_NEXT: case OP_ANYCHAR_STAR_PEEK_NEXT:
case OP_ANYCHAR_ML_STAR_PEEK_NEXT: case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
p++; num_cache_index++; break; p++; num++; *table_size += 1; break;
case OP_WORD: case OP_WORD:
case OP_NOT_WORD: case OP_NOT_WORD:
@ -332,7 +334,7 @@ static int count_num_cache_index(regex_t* reg)
case OP_BACKREF_MULTI: case OP_BACKREF_MULTI:
case OP_BACKREF_MULTI_IC: case OP_BACKREF_MULTI_IC:
case OP_BACKREF_WITH_LEVEL: case OP_BACKREF_WITH_LEVEL:
return NUM_CACHE_INDEX_FAIL; return NUM_CACHE_OPCODE_FAIL;
case OP_MEMORY_START: case OP_MEMORY_START:
case OP_MEMORY_START_PUSH: case OP_MEMORY_START_PUSH:
@ -352,45 +354,56 @@ static int count_num_cache_index(regex_t* reg)
break; break;
case OP_PUSH: case OP_PUSH:
p += SIZE_RELADDR; p += SIZE_RELADDR;
num_cache_index++; num++;
*table_size += 1;
break; break;
case OP_POP: case OP_POP:
break; break;
case OP_PUSH_OR_JUMP_EXACT1: case OP_PUSH_OR_JUMP_EXACT1:
case OP_PUSH_IF_PEEK_NEXT: case OP_PUSH_IF_PEEK_NEXT:
p += SIZE_RELADDR + 1; num_cache_index++; break; p += SIZE_RELADDR + 1; num++; *table_size += 1; break;
case OP_REPEAT: case OP_REPEAT:
case OP_REPEAT_NG: case OP_REPEAT_NG:
if (current_repeat != NO_OUTER_REPEAT) { if (current_mem != -1) {
// A nested OP_REPEAT is not yet supported. // A nested OP_REPEAT is not yet supported.
return NUM_CACHE_INDEX_FAIL; return NUM_CACHE_OPCODE_FAIL;
} }
GET_MEMNUM_INC(mem, p); GET_MEMNUM_INC(mem, p);
p += SIZE_RELADDR; p += SIZE_RELADDR;
if (reg->repeat_range[mem].lower == 0) { if (reg->repeat_range[mem].lower == 0) {
num_cache_index++; num++;
*table_size += 1;
} }
current_repeat = mem; reg->repeat_range[mem].base_num = num;
current_mem = mem;
current_mem_num = num;
break; break;
case OP_REPEAT_INC: case OP_REPEAT_INC:
case OP_REPEAT_INC_NG: case OP_REPEAT_INC_NG:
GET_MEMNUM_INC(mem, p); GET_MEMNUM_INC(mem, p);
if (mem != current_repeat) { //fprintf(stderr, "OP_REPEAT %d\n", mem);
if (mem != current_mem) {
// A lone or invalid OP_REPEAT_INC is found. // A lone or invalid OP_REPEAT_INC is found.
return NUM_CACHE_INDEX_FAIL; return NUM_CACHE_OPCODE_FAIL;
} }
{ {
int inner_num = num - current_mem_num;
OnigRepeatRange *repeat_range = &reg->repeat_range[mem]; OnigRepeatRange *repeat_range = &reg->repeat_range[mem];
repeat_range->inner_num = inner_num;
num -= inner_num;
num += inner_num * repeat_range->lower + (inner_num + 1) * (repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower);
//fprintf(stderr, "lower %d < upper %d\n", repeat_range->lower, repeat_range->upper);
if (repeat_range->lower < repeat_range->upper) { if (repeat_range->lower < repeat_range->upper) {
num_cache_index++; *table_size += 1;
} }
current_repeat = NO_OUTER_REPEAT; current_mem = -1;
current_mem_num = 0;
} }
break; break;
case OP_REPEAT_INC_SG: case OP_REPEAT_INC_SG:
case OP_REPEAT_INC_NG_SG: case OP_REPEAT_INC_NG_SG:
// TODO: Support nested OP_REPEAT. // TODO: Support nested OP_REPEAT.
return NUM_CACHE_INDEX_FAIL; return NUM_CACHE_OPCODE_FAIL;
case OP_NULL_CHECK_START: case OP_NULL_CHECK_START:
case OP_NULL_CHECK_END: case OP_NULL_CHECK_END:
case OP_NULL_CHECK_END_MEMST: case OP_NULL_CHECK_END_MEMST:
@ -409,21 +422,21 @@ static int count_num_cache_index(regex_t* reg)
case OP_PUSH_ABSENT_POS: case OP_PUSH_ABSENT_POS:
case OP_ABSENT_END: case OP_ABSENT_END:
case OP_ABSENT: case OP_ABSENT:
return NUM_CACHE_INDEX_FAIL; return NUM_CACHE_OPCODE_FAIL;
case OP_CALL: case OP_CALL:
case OP_RETURN: case OP_RETURN:
return NUM_CACHE_INDEX_FAIL; return NUM_CACHE_OPCODE_FAIL;
case OP_CONDITION: case OP_CONDITION:
return NUM_CACHE_INDEX_FAIL; return NUM_CACHE_OPCODE_FAIL;
case OP_STATE_CHECK_PUSH: case OP_STATE_CHECK_PUSH:
case OP_STATE_CHECK_PUSH_OR_JUMP: case OP_STATE_CHECK_PUSH_OR_JUMP:
case OP_STATE_CHECK: case OP_STATE_CHECK:
case OP_STATE_CHECK_ANYCHAR_STAR: case OP_STATE_CHECK_ANYCHAR_STAR:
case OP_STATE_CHECK_ANYCHAR_ML_STAR: case OP_STATE_CHECK_ANYCHAR_ML_STAR:
return NUM_CACHE_INDEX_FAIL; return NUM_CACHE_OPCODE_FAIL;
case OP_SET_OPTION_PUSH: case OP_SET_OPTION_PUSH:
case OP_SET_OPTION: case OP_SET_OPTION:
@ -432,22 +445,21 @@ static int count_num_cache_index(regex_t* reg)
} }
} }
return num_cache_index; return num;
} }
static int init_cache_index(regex_t* reg, OnigCacheIndex *cache_index) static void init_cache_index_table(regex_t* reg, OnigCacheIndex *table)
{ {
UChar* pbegin; UChar* pbegin;
UChar* p = reg->p; UChar* p = reg->p;
UChar* pend = p + reg->used; UChar* pend = p + reg->used;
LengthType len; LengthType len;
MemNumType mem; MemNumType mem;
MemNumType current_mem = -1;
int num = 0;
int current_mem_num = 0;
OnigEncoding enc = reg->enc; OnigEncoding enc = reg->enc;
int num_cache_point = 0;
MemNumType current_repeat = -1;
int current_repeat_base_point = 0;
while (p < pend) { while (p < pend) {
pbegin = p; pbegin = p;
switch (*p++) { switch (*p++) {
@ -500,20 +512,20 @@ static int init_cache_index(regex_t* reg, OnigCacheIndex *cache_index)
break; break;
case OP_ANYCHAR_STAR: case OP_ANYCHAR_STAR:
case OP_ANYCHAR_ML_STAR: case OP_ANYCHAR_ML_STAR:
cache_index->addr = pbegin; table->addr = pbegin;
cache_index->point = num_cache_point - current_repeat_base_point; table->num = num - current_mem_num;
cache_index->outer_repeat = current_repeat; table->outer_repeat = current_mem;
num_cache_point++; num++;
cache_index++; table++;
break; break;
case OP_ANYCHAR_STAR_PEEK_NEXT: case OP_ANYCHAR_STAR_PEEK_NEXT:
case OP_ANYCHAR_ML_STAR_PEEK_NEXT: case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
p++; p++;
cache_index->addr = pbegin; table->addr = pbegin;
cache_index->point = num_cache_point - current_repeat_base_point; table->num = num - current_mem_num;
cache_index->outer_repeat = current_repeat; table->outer_repeat = current_mem;
num_cache_point++; num++;
cache_index++; table++;
break; break;
case OP_WORD: case OP_WORD:
@ -547,7 +559,7 @@ static int init_cache_index(regex_t* reg, OnigCacheIndex *cache_index)
case OP_BACKREF_MULTI: case OP_BACKREF_MULTI:
case OP_BACKREF_MULTI_IC: case OP_BACKREF_MULTI_IC:
case OP_BACKREF_WITH_LEVEL: case OP_BACKREF_WITH_LEVEL:
return NUM_CACHE_INDEX_FAIL; return;
case OP_MEMORY_START: case OP_MEMORY_START:
case OP_MEMORY_START_PUSH: case OP_MEMORY_START_PUSH:
@ -567,61 +579,59 @@ static int init_cache_index(regex_t* reg, OnigCacheIndex *cache_index)
break; break;
case OP_PUSH: case OP_PUSH:
p += SIZE_RELADDR; p += SIZE_RELADDR;
cache_index->addr = pbegin; table->addr = pbegin;
cache_index->point = num_cache_point - current_repeat_base_point; table->num = num - current_mem_num;
cache_index->outer_repeat = current_repeat; table->outer_repeat = current_mem;
num_cache_point++; num++;
cache_index++; table++;
break; break;
case OP_POP: case OP_POP:
break; break;
case OP_PUSH_OR_JUMP_EXACT1: case OP_PUSH_OR_JUMP_EXACT1:
case OP_PUSH_IF_PEEK_NEXT: case OP_PUSH_IF_PEEK_NEXT:
p += SIZE_RELADDR + 1; p += SIZE_RELADDR + 1;
cache_index->addr = pbegin; table->addr = pbegin;
cache_index->point = num_cache_point - current_repeat_base_point; table->num = num - current_mem_num;
cache_index->outer_repeat = current_repeat; table->outer_repeat = current_mem;
num_cache_point++; num++;
cache_index++; table++;
break; break;
case OP_REPEAT: case OP_REPEAT:
case OP_REPEAT_NG: case OP_REPEAT_NG:
GET_MEMNUM_INC(mem, p); GET_MEMNUM_INC(mem, p);
p += SIZE_RELADDR; p += SIZE_RELADDR;
if (reg->repeat_range[mem].lower == 0) { if (reg->repeat_range[mem].lower == 0) {
cache_index->addr = pbegin; table->addr = pbegin;
cache_index->point = num_cache_point - current_repeat_base_point; table->num = num - current_mem_num;
cache_index->outer_repeat = current_repeat; table->outer_repeat = mem;
num_cache_point++; num++;
cache_index++; table++;
} }
reg->repeat_range[mem].base_point = num_cache_point; current_mem = mem;
current_repeat = mem; current_mem_num = num;
current_repeat_base_point = num_cache_point;
break; break;
case OP_REPEAT_INC: case OP_REPEAT_INC:
case OP_REPEAT_INC_NG: case OP_REPEAT_INC_NG:
GET_MEMNUM_INC(mem, p); GET_MEMNUM_INC(mem, p);
{ {
int inner_point = num_cache_point - current_repeat_base_point; int inner_num = num - current_mem_num;
OnigRepeatRange *repeat_range = &reg->repeat_range[mem]; OnigRepeatRange *repeat_range = &reg->repeat_range[mem];
if (repeat_range->lower < repeat_range->upper) { if (repeat_range->lower < repeat_range->upper) {
cache_index->addr = pbegin; table->addr = pbegin;
cache_index->point = num_cache_point - current_repeat_base_point; table->num = num - current_mem_num;
cache_index->outer_repeat = mem; table->outer_repeat = mem;
cache_index++; table++;
} }
repeat_range->inner_point = inner_point; num -= inner_num;
num_cache_point -= inner_point; num += inner_num * repeat_range->lower + (inner_num + 1) * (repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower);
num_cache_point += inner_point * repeat_range->lower + (inner_point + 1) * (repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower); current_mem = -1;
current_repeat = NO_OUTER_REPEAT; current_mem_num = 0;
current_repeat_base_point = 0;
} }
break; break;
case OP_REPEAT_INC_SG: case OP_REPEAT_INC_SG:
case OP_REPEAT_INC_NG_SG: case OP_REPEAT_INC_NG_SG:
// TODO: support OP_REPEAT opcodes. // TODO: support OP_REPEAT opcodes.
return NUM_CACHE_INDEX_FAIL; return;
case OP_NULL_CHECK_START: case OP_NULL_CHECK_START:
case OP_NULL_CHECK_END: case OP_NULL_CHECK_END:
case OP_NULL_CHECK_END_MEMST: case OP_NULL_CHECK_END_MEMST:
@ -640,21 +650,21 @@ static int init_cache_index(regex_t* reg, OnigCacheIndex *cache_index)
case OP_PUSH_ABSENT_POS: case OP_PUSH_ABSENT_POS:
case OP_ABSENT_END: case OP_ABSENT_END:
case OP_ABSENT: case OP_ABSENT:
return NUM_CACHE_INDEX_FAIL; return;
case OP_CALL: case OP_CALL:
case OP_RETURN: case OP_RETURN:
return NUM_CACHE_INDEX_FAIL; return;
case OP_CONDITION: case OP_CONDITION:
return NUM_CACHE_INDEX_FAIL; return;
case OP_STATE_CHECK_PUSH: case OP_STATE_CHECK_PUSH:
case OP_STATE_CHECK_PUSH_OR_JUMP: case OP_STATE_CHECK_PUSH_OR_JUMP:
case OP_STATE_CHECK: case OP_STATE_CHECK:
case OP_STATE_CHECK_ANYCHAR_STAR: case OP_STATE_CHECK_ANYCHAR_STAR:
case OP_STATE_CHECK_ANYCHAR_ML_STAR: case OP_STATE_CHECK_ANYCHAR_ML_STAR:
return NUM_CACHE_INDEX_FAIL; return;
case OP_SET_OPTION_PUSH: case OP_SET_OPTION_PUSH:
case OP_SET_OPTION: case OP_SET_OPTION:
@ -662,8 +672,6 @@ static int init_cache_index(regex_t* reg, OnigCacheIndex *cache_index)
break; break;
} }
} }
return num_cache_point;
} }
#endif /* USE_MATCH_CACHE */ #endif /* USE_MATCH_CACHE */
@ -851,16 +859,15 @@ onig_region_copy(OnigRegion* to, const OnigRegion* from)
#ifdef USE_CACHE_MATCH_OPT #ifdef USE_CACHE_MATCH_OPT
#define MATCH_ARG_INIT_CACHE_MATCH_OPT(msa) do {\ #define MATCH_ARG_INIT_CACHE_MATCH_OPT(msa) do {\
(msa).enable_cache_opt = 0;\ (msa).enable_cache_match_opt = 0;\
(msa).num_fail = 0;\ (msa).num_fail = 0;\
(msa).num_cache_index = NUM_CACHE_INDEX_UNINIT;\ (msa).num_cache_opcode = NUM_CACHE_OPCODE_UNINIT;\
(msa).num_cache_point = 0;\ (msa).cache_index_table = (OnigCacheIndex *)0;\
(msa).cache_index = (OnigCacheIndex *)0;\ (msa).match_cache = (uint8_t *)0;\
(msa).cache = (uint8_t *)0;\
} while(0) } while(0)
#define MATCH_ARG_FREE_CACHE_MATCH_OPT(msa) do {\ #define MATCH_ARG_FREE_CACHE_MATCH_OPT(msa) do {\
if ((msa).cache_index) xfree((msa).cache_index);\ if ((msa).cache_index_table) xfree((msa).cache_index_table);\
if ((msa).cache) xfree((msa).cache);\ if ((msa).match_cache) xfree((msa).match_cache);\
} while(0) } while(0)
#else #else
#define MATCH_ARG_INIT_CACHE_MATCH_OPT(msa) #define MATCH_ARG_INIT_CACHE_MATCH_OPT(msa)
@ -1153,9 +1160,9 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
#define DO_CACHE_MATCH_OPT(reg,stk,repeat_stk,enable,p,num_cache_table,num_cache_size,table,pos,match_cache) do {\ #define DO_CACHE_MATCH_OPT(reg,stk,repeat_stk,enable,p,num_cache_table,num_cache_size,table,pos,match_cache) do {\
if (enable) {\ if (enable) {\
int point = find_cache_index((reg), (stk), (repeat_stk), (table), (num_cache_table), (p));\ int cache_index = find_cache_index_table((reg), (stk), (repeat_stk), (table), (num_cache_table), (p));\
if (point >= 0) {\ if (cache_index >= 0) {\
int key = (num_cache_size) * (int)(pos) + point;\ int key = (num_cache_size) * (int)(pos) + cache_index;\
int index = key >> 3;\ int index = key >> 3;\
int mask = 1 << (key & 7);\ int mask = 1 << (key & 7);\
if ((match_cache)[index] & mask) {\ if ((match_cache)[index] & mask) {\
@ -1166,7 +1173,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
}\ }\
} while (0) } while (0)
static int find_cache_index(regex_t* reg, OnigStackType *stk, OnigStackIndex *repeat_stk, OnigCacheIndex* table, int num_cache_table, UChar* p) static int find_cache_index_table(regex_t* reg, OnigStackType *stk, OnigStackIndex *repeat_stk, OnigCacheIndex* table, int num_cache_table, UChar* p)
{ {
int l = 0, r = num_cache_table - 1, m; int l = 0, r = num_cache_table - 1, m;
OnigCacheIndex* item; OnigCacheIndex* item;
@ -1188,7 +1195,7 @@ static int find_cache_index(regex_t* reg, OnigStackType *stk, OnigStackIndex *re
item = &table[m]; item = &table[m];
if (item->outer_repeat == -1) { if (item->outer_repeat == -1) {
return item->point; return item->num;
} }
range = &reg->repeat_range[item->outer_repeat]; range = &reg->repeat_range[item->outer_repeat];
@ -1197,21 +1204,21 @@ static int find_cache_index(regex_t* reg, OnigStackType *stk, OnigStackIndex *re
count = is_inc ? stkp->u.repeat.count - 1 : stkp->u.repeat.count; count = is_inc ? stkp->u.repeat.count - 1 : stkp->u.repeat.count;
if (count < range->lower) { if (count < range->lower) {
return range->base_point + range->inner_point * count + item->point; return range->base_num + range->inner_num * count + item->num;
} }
if (range->upper == 0x7fffffff) { if (range->upper == 0x7fffffff) {
return range->base_point + range->inner_point * range->lower + (is_inc ? 0 : 1) + item->point; return range->base_num + range->inner_num * range->lower + (is_inc ? 0 : 1) + item->num;
} }
return range->base_point + range->inner_point * range->lower + (range->inner_point + 1) * (count - range->lower) + item->point; return range->base_num + range->inner_num * range->lower + (range->inner_num + 1) * (count - range->lower) + item->num;
} }
static void reset_match_cache(regex_t* reg, UChar* pbegin, UChar* pend, int pos, uint8_t* match_cache, OnigCacheIndex *table, int num_cache_size, int num_cache_table) { static void reset_match_cache(regex_t* reg, UChar* pbegin, UChar* pend, int pos, uint8_t* match_cache, OnigCacheIndex *table, int num_cache_size, int num_cache_table) {
int l = 0, r = num_cache_table - 1, m1, m2; int l = 0, r = num_cache_table - 1, m1, m2;
int is_inc = *pend == OP_REPEAT_INC || *pend == OP_REPEAT_INC_NG; int is_inc = *pend == OP_REPEAT_INC || *pend == OP_REPEAT_INC_NG;
OnigCacheIndex *item1, *item2; OnigCacheIndex *item1, *item2;
int p1, p2; int k1, k2;
while (l <= r) { while (l <= r) {
m1 = (l + r) / 2; m1 = (l + r) / 2;
@ -1234,32 +1241,32 @@ static void reset_match_cache(regex_t* reg, UChar* pbegin, UChar* pend, int pos,
item1 = &table[m1]; item1 = &table[m1];
item2 = &table[m2]; item2 = &table[m2];
if (item1->outer_repeat < 0) p1 = item1->point; if (item1->outer_repeat < 0) k1 = item1->num;
else p1 = reg->repeat_range[item1->outer_repeat].base_point + item1->point; else k1 = reg->repeat_range[item1->outer_repeat].base_num + item1->num;
if (item2->outer_repeat < 0) p2 = item2->point; if (item2->outer_repeat < 0) k2 = item2->num;
else { else {
OnigRepeatRange *range = &reg->repeat_range[item2->outer_repeat]; OnigRepeatRange *range = &reg->repeat_range[item2->outer_repeat];
if (range->upper == 0x7fffffff) p2 = range->base_point + range->inner_point * range->lower + (is_inc ? 0 : 1) + item2->point; if (range->upper == 0x7fffffff) k2 = range->base_num + range->inner_num * range->lower + (is_inc ? 0 : 1) + item2->num;
else p2 = range->base_point + range->inner_point * range->lower + (range->inner_point + 1) * (range->upper - range->lower - (is_inc ? 1 : 0)) + item2->point; else k2 = range->base_num + range->inner_num * range->lower + (range->inner_num + 1) * (range->upper - range->lower - (is_inc ? 1 : 0)) + item2->num;
} }
int base = pos * num_cache_size; int base = pos * num_cache_size;
p1 += base; k1 += base;
p2 += base; k2 += base;
if ((p1 >> 3) == (p2 >> 3)) { if ((k1 >> 3) == (k2 >> 3)) {
match_cache[p1 >> 3] &= ((1 << (8 - (p2 & 7) - 1)) - 1 << ((p2 & 7) + 1)) | ((1 << (p1 & 7)) - 1); match_cache[k1 >> 3] &= ((1 << (8 - (k2 & 7) - 1)) - 1 << ((k2 & 7) + 1)) | ((1 << (k1 & 7)) - 1);
} else { } else {
int i = p1 >> 3; int i = k1 >> 3;
if (p1 & 7) { if (k1 & 7) {
match_cache[p1 >> 3] &= (1 << ((p1 & 7) - 1)) - 1; match_cache[k1 >> 3] &= (1 << ((k1 & 7) - 1)) - 1;
i++; i++;
} }
if (i < (p2 >> 3)) { if (i < (k2 >> 3)) {
xmemset(&match_cache[i], 0, (p2 >> 3) - i); xmemset(&match_cache[i], 0, (k2 >> 3) - i);
if (p2 & 7) { if (k2 & 7) {
match_cache[p2 >> 3] &= ((1 << (8 - (p2 & 7) - 1)) - 1 << ((p2 & 7) + 1)); match_cache[k2 >> 3] &= ((1 << (8 - (k2 & 7) - 1)) - 1 << ((k2 & 7) + 1));
} }
} }
} }
@ -2754,7 +2761,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_ANYCHAR_STAR) MOP_IN(OP_ANYCHAR_STAR); CASE(OP_ANYCHAR_STAR) MOP_IN(OP_ANYCHAR_STAR);
while (DATA_ENSURE_CHECK1) { while (DATA_ENSURE_CHECK1) {
DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
STACK_PUSH_ALT(p, s, sprev, pkeep); STACK_PUSH_ALT(p, s, sprev, pkeep);
n = enclen(encode, s, end); n = enclen(encode, s, end);
DATA_ENSURE(n); DATA_ENSURE(n);
@ -2767,7 +2774,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_ANYCHAR_ML_STAR) MOP_IN(OP_ANYCHAR_ML_STAR); CASE(OP_ANYCHAR_ML_STAR) MOP_IN(OP_ANYCHAR_ML_STAR);
while (DATA_ENSURE_CHECK1) { while (DATA_ENSURE_CHECK1) {
DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
STACK_PUSH_ALT(p, s, sprev, pkeep); STACK_PUSH_ALT(p, s, sprev, pkeep);
n = enclen(encode, s, end); n = enclen(encode, s, end);
if (n > 1) { if (n > 1) {
@ -2786,7 +2793,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_ANYCHAR_STAR_PEEK_NEXT) MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); CASE(OP_ANYCHAR_STAR_PEEK_NEXT) MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
while (DATA_ENSURE_CHECK1) { while (DATA_ENSURE_CHECK1) {
if (*p == *s) { if (*p == *s) {
DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache);
STACK_PUSH_ALT(p + 1, s, sprev, pkeep); STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
} }
n = enclen(encode, s, end); n = enclen(encode, s, end);
@ -2802,7 +2809,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
while (DATA_ENSURE_CHECK1) { while (DATA_ENSURE_CHECK1) {
if (*p == *s) { if (*p == *s) {
DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
STACK_PUSH_ALT(p + 1, s, sprev, pkeep); STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
} }
n = enclen(encode, s, end); n = enclen(encode, s, end);
@ -3389,7 +3396,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_NULL_CHECK_END_MEMST) MOP_IN(OP_NULL_CHECK_END_MEMST); CASE(OP_NULL_CHECK_END_MEMST) MOP_IN(OP_NULL_CHECK_END_MEMST);
{ {
int isnull; int isnull;
int ischanged = 0; // set 1 when a loop is null but memory status is changed. int ischanged = 0; // set 1 when a loop is empty but memory status is changed.
GET_MEMNUM_INC(mem, p); /* mem: null check id */ GET_MEMNUM_INC(mem, p); /* mem: null check id */
STACK_NULL_CHECK_MEMST(isnull, ischanged, mem, s, reg); STACK_NULL_CHECK_MEMST(isnull, ischanged, mem, s, reg);
@ -3402,7 +3409,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
goto null_check_found; goto null_check_found;
} }
# ifdef USE_CACHE_MATCH_OPT # ifdef USE_CACHE_MATCH_OPT
if (ischanged && msa->enable_cache_opt) { if (ischanged && msa->enable_cache_match_opt) {
RelAddrType rel; RelAddrType rel;
OnigUChar *addr; OnigUChar *addr;
int mem; int mem;
@ -3421,7 +3428,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
default: default:
goto unexpected_bytecode_error; goto unexpected_bytecode_error;
} }
reset_match_cache(reg, addr, pbegin, (int)(s - str), msa->cache, msa->cache_index, msa->num_cache_index ,msa->num_cache_point); reset_match_cache(reg, addr, pbegin, (int)(s - str), msa->match_cache, msa->cache_index_table, msa->num_cache_table ,msa->num_cache_opcode);
} }
# endif # endif
} }
@ -3466,7 +3473,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_PUSH) MOP_IN(OP_PUSH); CASE(OP_PUSH) MOP_IN(OP_PUSH);
GET_RELADDR_INC(addr, p); GET_RELADDR_INC(addr, p);
DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
STACK_PUSH_ALT(p + addr, s, sprev, pkeep); STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
MOP_OUT; MOP_OUT;
JUMP; JUMP;
@ -3520,7 +3527,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
GET_RELADDR_INC(addr, p); GET_RELADDR_INC(addr, p);
if (*p == *s && DATA_ENSURE_CHECK1) { if (*p == *s && DATA_ENSURE_CHECK1) {
p++; p++;
DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
STACK_PUSH_ALT(p + addr, s, sprev, pkeep); STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
MOP_OUT; MOP_OUT;
JUMP; JUMP;
@ -3534,7 +3541,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
GET_RELADDR_INC(addr, p); GET_RELADDR_INC(addr, p);
if (*p == *s) { if (*p == *s) {
p++; p++;
DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
STACK_PUSH_ALT(p + addr, s, sprev, pkeep); STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
MOP_OUT; MOP_OUT;
JUMP; JUMP;
@ -3553,7 +3560,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_PUSH_REPEAT(mem, p); STACK_PUSH_REPEAT(mem, p);
if (reg->repeat_range[mem].lower == 0) { if (reg->repeat_range[mem].lower == 0) {
DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache);
STACK_PUSH_ALT(p + addr, s, sprev, pkeep); STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
} }
} }
@ -3570,7 +3577,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_PUSH_REPEAT(mem, p); STACK_PUSH_REPEAT(mem, p);
if (reg->repeat_range[mem].lower == 0) { if (reg->repeat_range[mem].lower == 0) {
DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
STACK_PUSH_ALT(p, s, sprev, pkeep); STACK_PUSH_ALT(p, s, sprev, pkeep);
p += addr; p += addr;
} }
@ -3590,7 +3597,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
} }
else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
if (*pbegin == OP_REPEAT_INC) { if (*pbegin == OP_REPEAT_INC) {
DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
} }
STACK_PUSH_ALT(p, s, sprev, pkeep); STACK_PUSH_ALT(p, s, sprev, pkeep);
p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */ p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
@ -3623,7 +3630,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_PUSH_REPEAT_INC(si); STACK_PUSH_REPEAT_INC(si);
if (*pbegin == OP_REPEAT_INC_NG) { if (*pbegin == OP_REPEAT_INC_NG) {
DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
} }
STACK_PUSH_ALT(pcode, s, sprev, pkeep); STACK_PUSH_ALT(pcode, s, sprev, pkeep);
} }
@ -3813,33 +3820,35 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
pkeep = stk->u.state.pkeep; pkeep = stk->u.state.pkeep;
#ifdef USE_CACHE_MATCH_OPT #ifdef USE_CACHE_MATCH_OPT
if (++msa->num_fail >= (int)(end - str) + 1 && msa->num_cache_index == NUM_CACHE_INDEX_UNINIT) { if (++msa->num_fail >= (int)(end - str) + 1 && msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) {
msa->enable_cache_opt = 1; int table_size = 0;
if (msa->num_cache_index == NUM_CACHE_INDEX_UNINIT) { msa->enable_cache_match_opt = 1;
msa->num_cache_index = count_num_cache_index(reg); if (msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) {
msa->num_cache_opcode = count_num_cache_opcode(reg, &table_size);
} }
if (msa->num_cache_index == NUM_CACHE_INDEX_FAIL || msa->num_cache_index == 0) { if (msa->num_cache_opcode == NUM_CACHE_OPCODE_FAIL || msa->num_cache_opcode == 0) {
msa->enable_cache_opt = 0; msa->enable_cache_match_opt = 0;
goto fail_match_cache_opt; goto fail_match_cache_opt;
} }
if (msa->cache_index == NULL) { if (msa->cache_index_table == NULL) {
OnigCacheIndex *table = (OnigCacheIndex *)xmalloc(msa->num_cache_index * sizeof(OnigCacheIndex)); OnigCacheIndex *table = (OnigCacheIndex *)xmalloc(table_size * sizeof(OnigCacheIndex));
if (table == NULL) { if (table == NULL) {
msa->enable_cache_opt = 0; msa->enable_cache_match_opt = 0;
goto fail_match_cache_opt; goto fail_match_cache_opt;
} }
msa->num_cache_point = init_cache_index(reg, table); init_cache_index_table(reg, table);
msa->cache_index = table; msa->cache_index_table = table;
msa->num_cache_table = table_size;
} }
// TODO: check arithemetic overflow. // TODO: check arithemetic overflow.
int cache_size8 = msa->num_cache_point * ((int)(end - str) + 1); int match_cache_size8 = msa->num_cache_opcode * ((int)(end - str) + 1);
int cache_size = (cache_size8 >> 3) + (cache_size8 & 7 ? 1 : 0); int match_cache_size = (match_cache_size8 >> 3) + (match_cache_size8 & 7 ? 1 : 0);
msa->cache = (uint8_t*)xmalloc(cache_size * sizeof(uint8_t)); msa->match_cache = (uint8_t*)xmalloc(match_cache_size * sizeof(uint8_t));
if (msa->cache == NULL) { if (msa->match_cache == NULL) {
msa->enable_cache_opt = 0; msa->enable_cache_match_opt = 0;
goto fail_match_cache_opt; goto fail_match_cache_opt;
} }
xmemset(msa->cache, 0, cache_size * sizeof(uint8_t)); xmemset(msa->match_cache, 0, match_cache_size * sizeof(uint8_t));
} }
fail_match_cache_opt: fail_match_cache_opt:
#endif #endif

View File

@ -45,8 +45,8 @@
#define USE_CACHE_MATCH_OPT #define USE_CACHE_MATCH_OPT
#ifdef USE_CACHE_MATCH_OPT #ifdef USE_CACHE_MATCH_OPT
# define NUM_CACHE_INDEX_FAIL -1 # define NUM_CACHE_OPCODE_FAIL -1
# define NUM_CACHE_INDEX_UNINIT -2 # define NUM_CACHE_OPCODE_UNINIT -2
#endif #endif
#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \ #if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \
@ -874,11 +874,10 @@ typedef struct _OnigStackType {
#ifdef USE_CACHE_MATCH_OPT #ifdef USE_CACHE_MATCH_OPT
typedef struct { typedef struct {
UChar* addr; /* pointer to corresponding opcode. */ UChar *addr;
int point; /* cache point number (in outer repeat if `outer_repeat != -1`) */ int num;
int outer_repeat; /* outer repeat index number */ int outer_repeat;
} OnigCacheIndex; } OnigCacheIndex;
#define NO_OUTER_REPEAT -1
#endif #endif
typedef struct { typedef struct {
@ -904,12 +903,12 @@ typedef struct {
uint64_t end_time; uint64_t end_time;
#endif #endif
#ifdef USE_CACHE_MATCH_OPT #ifdef USE_CACHE_MATCH_OPT
int num_fail; /* counter of failure (backtrack) number for switching cache optimization. */ int num_fail;
int num_cache_point; /* number of cache point in program */ int enable_cache_match_opt;
int num_cache_index; /* size of cache index array */ int num_cache_opcode;
int enable_cache_opt; /* whether cache optimization is enabled */ int num_cache_table;
OnigCacheIndex* cache_index; /* cache index array for computing cache point number */ OnigCacheIndex *cache_index_table;
uint8_t* cache; /* bit array for cache optimization */ uint8_t *match_cache;
#endif #endif
} OnigMatchArg; } OnigMatchArg;