Refactor Regexp#match cache implementation (#7724)

* Refactor Regexp#match cache implementation

Improved variable and function names
Fixed [Bug 19537] (Maybe fixed in https://github.com/ruby/ruby/pull/7694)

* Add a comment of the glossary for "match cache"

* Skip to reset match cache when no cache point on null check
This commit is contained in:
TSUYUSATO Kitsune 2023-04-19 13:08:28 +09:00 committed by GitHub
parent 8023da746c
commit a1c2c274ee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
Notes: git 2023-04-19 04:09:07 +00:00
Merged-By: makenowjust <make.just.on@gmail.com>
4 changed files with 437 additions and 334 deletions

View File

@ -744,8 +744,6 @@ typedef struct {
typedef struct { typedef struct {
int lower; int lower;
int upper; int upper;
long base_num;
long inner_num;
} OnigRepeatRange; } OnigRepeatRange;
typedef void (*OnigWarnFunc)(const char* s); typedef void (*OnigWarnFunc)(const char* s);

719
regexec.c

File diff suppressed because it is too large Load Diff

View File

@ -35,19 +35,15 @@
/* #define ONIG_DEBUG_COMPILE */ /* #define ONIG_DEBUG_COMPILE */
/* #define ONIG_DEBUG_SEARCH */ /* #define ONIG_DEBUG_SEARCH */
/* #define ONIG_DEBUG_MATCH */ /* #define ONIG_DEBUG_MATCH */
/* #define ONIG_DEBUG_MATCH_CACHE */
/* #define ONIG_DEBUG_MEMLEAK */ /* #define ONIG_DEBUG_MEMLEAK */
/* #define ONIG_DONT_OPTIMIZE */ /* #define ONIG_DONT_OPTIMIZE */
/* for byte-code statistical data. */ /* for byte-code statistical data. */
/* #define ONIG_DEBUG_STATISTICS */ /* #define ONIG_DEBUG_STATISTICS */
/* enable matching optimization by using cache. */ /* enable the match optimization by using a cache. */
#define USE_CACHE_MATCH_OPT #define USE_MATCH_CACHE
#ifdef USE_CACHE_MATCH_OPT
# define NUM_CACHE_OPCODE_FAIL -1
# define NUM_CACHE_OPCODE_UNINIT -2
#endif
#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \ #if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \
defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \ defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \
@ -880,12 +876,14 @@ typedef struct _OnigStackType {
} u; } u;
} OnigStackType; } OnigStackType;
#ifdef USE_CACHE_MATCH_OPT #ifdef USE_MATCH_CACHE
typedef struct { typedef struct {
UChar *addr; UChar *addr;
long num; long cache_point;
int outer_repeat; int outer_repeat_mem;
} OnigCacheIndex; long num_cache_points_at_outer_repeat;
long num_cache_points_in_outer_repeat;
} OnigCacheOpcode;
#endif #endif
typedef struct { typedef struct {
@ -910,16 +908,18 @@ typedef struct {
#else #else
uint64_t end_time; uint64_t end_time;
#endif #endif
#ifdef USE_CACHE_MATCH_OPT #ifdef USE_MATCH_CACHE
long num_fail; int enable_match_cache;
int enable_cache_match_opt; long num_fails;
long num_cache_opcode; long num_cache_opcodes;
long num_cache_table; OnigCacheOpcode* cache_opcodes;
OnigCacheIndex* cache_index_table; long num_cache_points;
uint8_t* match_cache; uint8_t* match_cache_buf;
#endif #endif
} OnigMatchArg; } OnigMatchArg;
#define NUM_CACHE_OPCODES_IMPOSSIBLE -1
#define NUM_CACHE_OPCODES_UNINIT -2
#define IS_CODE_SB_WORD(enc,code) \ #define IS_CODE_SB_WORD(enc,code) \
(ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code)) (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code))

View File

@ -1733,7 +1733,7 @@ class TestRegexp < Test::Unit::TestCase
end; end;
end end
def test_cache_optimization_exponential def test_match_cache_exponential
assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}") assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}")
timeout = #{ EnvUtil.apply_timeout_scale(10).inspect } timeout = #{ EnvUtil.apply_timeout_scale(10).inspect }
begin; begin;
@ -1743,7 +1743,7 @@ class TestRegexp < Test::Unit::TestCase
end; end;
end end
def test_cache_optimization_square def test_match_cache_square
assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}") assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}")
timeout = #{ EnvUtil.apply_timeout_scale(10).inspect } timeout = #{ EnvUtil.apply_timeout_scale(10).inspect }
begin; begin;
@ -1753,7 +1753,7 @@ class TestRegexp < Test::Unit::TestCase
end; end;
end end
def test_cache_index_initialize def test_cache_opcodes_initialize
str = 'test1-test2-test3-test4-test_5' str = 'test1-test2-test3-test4-test_5'
re = '^([0-9a-zA-Z\-/]*){1,256}$' re = '^([0-9a-zA-Z\-/]*){1,256}$'
100.times do 100.times do
@ -1781,6 +1781,14 @@ class TestRegexp < Test::Unit::TestCase
assert_equal("123456789".match(/(?:x?\dx?){2,}/)[0], "123456789") assert_equal("123456789".match(/(?:x?\dx?){2,}/)[0], "123456789")
end end
def test_bug_19537
str = 'aac'
re = '^([ab]{1,3})(a?)*$'
100.times do
assert !Regexp.new(re).match?(str)
end
end
def test_linear_time_p def test_linear_time_p
assert_send [Regexp, :linear_time?, /a/] assert_send [Regexp, :linear_time?, /a/]
assert_send [Regexp, :linear_time?, 'a'] assert_send [Regexp, :linear_time?, 'a']