From 457c2c4a55b5896b30510ff8b39256b61b46cdf4 Mon Sep 17 00:00:00 2001 From: mame Date: Wed, 17 Feb 2010 12:36:41 +0000 Subject: [PATCH] * regcomp.c (setup_tree, onig_compile): optimize .* at last by converting into (?>.*), which does not backtrack. [ruby-core:27791] * test/ruby/test_regexp.rb: add a test for above. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@26701 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 7 +++++++ regcomp.c | 20 ++++++++++++++++++-- test/ruby/test_regexp.rb | 7 +++++++ 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index d7efba55dd..932976b16a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +Wed Feb 17 21:34:01 2010 Yusuke Endoh + + * regcomp.c (setup_tree, onig_compile): optimize .* at last by + converting into (?>.*), which does not backtrack. [ruby-core:27791] + + * test/ruby/test_regexp.rb: add a test for above. + Wed Feb 17 21:26:53 2010 Tanaka Akira * bootstraptest/runner.rb (assert_normal_exit): add :timeout option. diff --git a/regcomp.c b/regcomp.c index e1ac7ee85c..16c816b1f7 100644 --- a/regcomp.c +++ b/regcomp.c @@ -3643,6 +3643,7 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) #define IN_NOT (1<<1) #define IN_REPEAT (1<<2) #define IN_VAR_REPEAT (1<<3) +#define IN_LAST (1<<4) /* setup_tree does the following work. 1. check empty loop. (set qn->target_empty_info) @@ -3664,7 +3665,8 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) { Node* prev = NULL_NODE; do { - r = setup_tree(NCAR(node), reg, state, env); + int s = IS_NOT_NULL(NCDR(node)) ? (state & ~IN_LAST) : state; + r = setup_tree(NCAR(node), reg, s, env); if (IS_NOT_NULL(prev) && r == 0) { r = next_setup(prev, NCAR(node), reg); } @@ -3795,6 +3797,20 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) } } #endif + + if ((state & IN_LAST) != 0 && qn->greedy && IS_REPEAT_INFINITE(qn->upper)) { + /* automatic posseivation a* (at last) ==> (?>a*) */ + if (qn->lower <= 1) { + int ttype = NTYPE(qn->target); + if (IS_NODE_TYPE_SIMPLE(ttype)) { + Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK); + CHECK_NULL_RETURN_MEMERR(en); + SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT); + swap_node(node, en); + NENCLOSE(node)->target = en; + } + } + } } break; @@ -5423,7 +5439,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, reg->num_call = 0; #endif - r = setup_tree(root, reg, 0, &scan_env); + r = setup_tree(root, reg, IN_LAST, &scan_env); if (r != 0) goto err_unset; #ifdef ONIG_DEBUG_PARSE_TREE diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb index 4cd35d5248..06c6d1c633 100644 --- a/test/ruby/test_regexp.rb +++ b/test/ruby/test_regexp.rb @@ -800,4 +800,11 @@ class TestRegexp < Test::Unit::TestCase assert_nothing_raised { eval("a = 1; /\#{ a }/; a") } assert_nothing_raised { eval("a = 1; /\#{ a }/o; a") } end + + def test_optimize_last_anycharstar + s = "1" + " " * 5000000 + assert_nothing_raised { s.match(/(\d) (.*)/) } + assert_equal("1", $1) + assert_equal(" " * 4999999, $2) + end end