Refactor Regexp#match cache implementation (#7724)

* Refactor Regexp#match cache implementation

Improved variable and function names
Fixed [Bug 19537] (Maybe fixed in https://github.com/ruby/ruby/pull/7694)

* Add a comment of the glossary for "match cache"

* Skip to reset match cache when no cache point on null check
This commit is contained in:
TSUYUSATO Kitsune 2023-04-19 13:08:28 +09:00 committed by GitHub
parent 8023da746c
commit a1c2c274ee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
Notes: git 2023-04-19 04:09:07 +00:00
Merged-By: makenowjust <make.just.on@gmail.com>
4 changed files with 437 additions and 334 deletions

View File

@ -744,8 +744,6 @@ typedef struct {
typedef struct {
int lower;
int upper;
long base_num;
long inner_num;
} OnigRepeatRange;
typedef void (*OnigWarnFunc)(const char* s);

719
regexec.c

File diff suppressed because it is too large Load Diff

View File

@ -35,19 +35,15 @@
/* #define ONIG_DEBUG_COMPILE */
/* #define ONIG_DEBUG_SEARCH */
/* #define ONIG_DEBUG_MATCH */
/* #define ONIG_DEBUG_MATCH_CACHE */
/* #define ONIG_DEBUG_MEMLEAK */
/* #define ONIG_DONT_OPTIMIZE */
/* for byte-code statistical data. */
/* #define ONIG_DEBUG_STATISTICS */
/* enable matching optimization by using cache. */
#define USE_CACHE_MATCH_OPT
#ifdef USE_CACHE_MATCH_OPT
# define NUM_CACHE_OPCODE_FAIL -1
# define NUM_CACHE_OPCODE_UNINIT -2
#endif
/* enable the match optimization by using a cache. */
#define USE_MATCH_CACHE
#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \
defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \
@ -880,12 +876,14 @@ typedef struct _OnigStackType {
} u;
} OnigStackType;
#ifdef USE_CACHE_MATCH_OPT
#ifdef USE_MATCH_CACHE
typedef struct {
UChar *addr;
long num;
int outer_repeat;
} OnigCacheIndex;
long cache_point;
int outer_repeat_mem;
long num_cache_points_at_outer_repeat;
long num_cache_points_in_outer_repeat;
} OnigCacheOpcode;
#endif
typedef struct {
@ -910,16 +908,18 @@ typedef struct {
#else
uint64_t end_time;
#endif
#ifdef USE_CACHE_MATCH_OPT
long num_fail;
int enable_cache_match_opt;
long num_cache_opcode;
long num_cache_table;
OnigCacheIndex* cache_index_table;
uint8_t* match_cache;
#ifdef USE_MATCH_CACHE
int enable_match_cache;
long num_fails;
long num_cache_opcodes;
OnigCacheOpcode* cache_opcodes;
long num_cache_points;
uint8_t* match_cache_buf;
#endif
} OnigMatchArg;
#define NUM_CACHE_OPCODES_IMPOSSIBLE -1
#define NUM_CACHE_OPCODES_UNINIT -2
#define IS_CODE_SB_WORD(enc,code) \
(ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code))

View File

@ -1733,7 +1733,7 @@ class TestRegexp < Test::Unit::TestCase
end;
end
def test_cache_optimization_exponential
def test_match_cache_exponential
assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}")
timeout = #{ EnvUtil.apply_timeout_scale(10).inspect }
begin;
@ -1743,7 +1743,7 @@ class TestRegexp < Test::Unit::TestCase
end;
end
def test_cache_optimization_square
def test_match_cache_square
assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}")
timeout = #{ EnvUtil.apply_timeout_scale(10).inspect }
begin;
@ -1753,7 +1753,7 @@ class TestRegexp < Test::Unit::TestCase
end;
end
def test_cache_index_initialize
def test_cache_opcodes_initialize
str = 'test1-test2-test3-test4-test_5'
re = '^([0-9a-zA-Z\-/]*){1,256}$'
100.times do
@ -1781,6 +1781,14 @@ class TestRegexp < Test::Unit::TestCase
assert_equal("123456789".match(/(?:x?\dx?){2,}/)[0], "123456789")
end
def test_bug_19537
str = 'aac'
re = '^([ab]{1,3})(a?)*$'
100.times do
assert !Regexp.new(re).match?(str)
end
end
def test_linear_time_p
assert_send [Regexp, :linear_time?, /a/]
assert_send [Regexp, :linear_time?, 'a']