From eee9bd1aa44052af1a283051c7f767248ea9d54b Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Sun, 16 Feb 2025 10:48:07 +0900 Subject: [PATCH] [ruby/strscan] Fix a bug that scan_until behaves differently with Regexp and String patterns (https://github.com/ruby/strscan/pull/138) Fix https://github.com/ruby/strscan/pull/131 https://github.com/ruby/strscan/commit/e1cec2e726 --- ext/strscan/strscan.c | 13 +++--- test/strscan/test_stringscanner.rb | 67 +++++++++++++++++++++++++++--- 2 files changed, 68 insertions(+), 12 deletions(-) diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c index c327d0c98b..012d3a4c42 100644 --- a/ext/strscan/strscan.c +++ b/ext/strscan/strscan.c @@ -571,19 +571,20 @@ match_target(struct strscanner *p) } static inline void -set_registers(struct strscanner *p, size_t length) +set_registers(struct strscanner *p, size_t pos, size_t length) { const int at = 0; OnigRegion *regs = &(p->regs); onig_region_clear(regs); if (onig_region_set(regs, at, 0, 0)) return; if (p->fixed_anchor_p) { - regs->beg[at] = p->curr; - regs->end[at] = p->curr + length; + regs->beg[at] = pos + p->curr; + regs->end[at] = pos + p->curr + length; } else { - regs->end[at] = length; + regs->beg[at] = pos; + regs->end[at] = pos + length; } } @@ -731,7 +732,7 @@ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) { return Qnil; } - set_registers(p, RSTRING_LEN(pattern)); + set_registers(p, 0, RSTRING_LEN(pattern)); } else { rb_encoding *enc = rb_enc_check(p->str, pattern); @@ -740,7 +741,7 @@ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly if (pos == -1) { return Qnil; } - set_registers(p, RSTRING_LEN(pattern) + pos); + set_registers(p, pos, RSTRING_LEN(pattern)); } } diff --git a/test/strscan/test_stringscanner.rb b/test/strscan/test_stringscanner.rb index 1c2fb57711..eb35dfa119 100644 --- a/test/strscan/test_stringscanner.rb +++ b/test/strscan/test_stringscanner.rb @@ -409,12 +409,8 @@ module StringScannerTests s = create_string_scanner('stra strb strc') s.scan(/\w+/) assert_equal('stra', s.matched) - s.scan(/\s+/) - assert_equal(' ', s.matched) - s.scan('st') - assert_equal('st', s.matched) - s.scan(/\w+/) - assert_equal('rb', s.matched) + s.scan_until(/\w+/) + assert_equal('strb', s.matched) s.scan(/\s+/) assert_equal(' ', s.matched) s.scan(/\w+/) @@ -432,6 +428,23 @@ module StringScannerTests assert_equal('t', s.matched) end + def test_matched_string + omit("not implemented on TruffleRuby") if RUBY_ENGINE == "truffleruby" + s = create_string_scanner('stra strb strc') + s.scan('stra') + assert_equal('stra', s.matched) + s.scan_until('strb') + assert_equal('strb', s.matched) + s.scan(' ') + assert_equal(' ', s.matched) + s.scan('strc') + assert_equal('strc', s.matched) + s.scan('c') + assert_nil(s.matched) + s.getch + assert_nil(s.matched) + end + def test_AREF s = create_string_scanner('stra strb strc') @@ -522,6 +535,27 @@ module StringScannerTests assert_nil(s.pre_match) end + def test_pre_match_string + omit("not implemented on TruffleRuby") if RUBY_ENGINE == "truffleruby" + s = create_string_scanner('a b c d e') + s.scan('a') + assert_equal('', s.pre_match) + s.skip(' ') + assert_equal('a', s.pre_match) + s.scan('b') + assert_equal('a ', s.pre_match) + s.scan_until('c') + assert_equal('a b ', s.pre_match) + s.getch + assert_equal('a b c', s.pre_match) + s.get_byte + assert_equal('a b c ', s.pre_match) + s.get_byte + assert_equal('a b c d', s.pre_match) + s.scan('never match') + assert_nil(s.pre_match) + end + def test_post_match s = create_string_scanner('a b c d e') s.scan(/\w/) @@ -546,6 +580,27 @@ module StringScannerTests assert_nil(s.post_match) end + def test_post_match_string + omit("not implemented on TruffleRuby") if RUBY_ENGINE == "truffleruby" + s = create_string_scanner('a b c d e') + s.scan('a') + assert_equal(' b c d e', s.post_match) + s.skip(' ') + assert_equal('b c d e', s.post_match) + s.scan('b') + assert_equal(' c d e', s.post_match) + s.scan_until('c') + assert_equal(' d e', s.post_match) + s.getch + assert_equal('d e', s.post_match) + s.get_byte + assert_equal(' e', s.post_match) + s.get_byte + assert_equal('e', s.post_match) + s.scan('never match') + assert_nil(s.post_match) + end + def test_terminate s = create_string_scanner('ssss') s.getch