[ruby/strscan] Fix a bug that scan_until behaves differently with

Regexp and String patterns
(https://github.com/ruby/strscan/pull/138)

Fix https://github.com/ruby/strscan/pull/131

https://github.com/ruby/strscan/commit/e1cec2e726
This commit is contained in:
NAITOH Jun 2025-02-16 10:48:07 +09:00 committed by Hiroshi SHIBATA
parent 6b3a97d74b
commit eee9bd1aa4
2 changed files with 68 additions and 12 deletions

View File

@ -571,19 +571,20 @@ match_target(struct strscanner *p)
}
static inline void
set_registers(struct strscanner *p, size_t length)
set_registers(struct strscanner *p, size_t pos, size_t length)
{
const int at = 0;
OnigRegion *regs = &(p->regs);
onig_region_clear(regs);
if (onig_region_set(regs, at, 0, 0)) return;
if (p->fixed_anchor_p) {
regs->beg[at] = p->curr;
regs->end[at] = p->curr + length;
regs->beg[at] = pos + p->curr;
regs->end[at] = pos + p->curr + length;
}
else
{
regs->end[at] = length;
regs->beg[at] = pos;
regs->end[at] = pos + length;
}
}
@ -731,7 +732,7 @@ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly
if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) {
return Qnil;
}
set_registers(p, RSTRING_LEN(pattern));
set_registers(p, 0, RSTRING_LEN(pattern));
}
else {
rb_encoding *enc = rb_enc_check(p->str, pattern);
@ -740,7 +741,7 @@ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly
if (pos == -1) {
return Qnil;
}
set_registers(p, RSTRING_LEN(pattern) + pos);
set_registers(p, pos, RSTRING_LEN(pattern));
}
}

View File

@ -409,12 +409,8 @@ module StringScannerTests
s = create_string_scanner('stra strb strc')
s.scan(/\w+/)
assert_equal('stra', s.matched)
s.scan(/\s+/)
assert_equal(' ', s.matched)
s.scan('st')
assert_equal('st', s.matched)
s.scan(/\w+/)
assert_equal('rb', s.matched)
s.scan_until(/\w+/)
assert_equal('strb', s.matched)
s.scan(/\s+/)
assert_equal(' ', s.matched)
s.scan(/\w+/)
@ -432,6 +428,23 @@ module StringScannerTests
assert_equal('t', s.matched)
end
def test_matched_string
omit("not implemented on TruffleRuby") if RUBY_ENGINE == "truffleruby"
s = create_string_scanner('stra strb strc')
s.scan('stra')
assert_equal('stra', s.matched)
s.scan_until('strb')
assert_equal('strb', s.matched)
s.scan(' ')
assert_equal(' ', s.matched)
s.scan('strc')
assert_equal('strc', s.matched)
s.scan('c')
assert_nil(s.matched)
s.getch
assert_nil(s.matched)
end
def test_AREF
s = create_string_scanner('stra strb strc')
@ -522,6 +535,27 @@ module StringScannerTests
assert_nil(s.pre_match)
end
def test_pre_match_string
omit("not implemented on TruffleRuby") if RUBY_ENGINE == "truffleruby"
s = create_string_scanner('a b c d e')
s.scan('a')
assert_equal('', s.pre_match)
s.skip(' ')
assert_equal('a', s.pre_match)
s.scan('b')
assert_equal('a ', s.pre_match)
s.scan_until('c')
assert_equal('a b ', s.pre_match)
s.getch
assert_equal('a b c', s.pre_match)
s.get_byte
assert_equal('a b c ', s.pre_match)
s.get_byte
assert_equal('a b c d', s.pre_match)
s.scan('never match')
assert_nil(s.pre_match)
end
def test_post_match
s = create_string_scanner('a b c d e')
s.scan(/\w/)
@ -546,6 +580,27 @@ module StringScannerTests
assert_nil(s.post_match)
end
def test_post_match_string
omit("not implemented on TruffleRuby") if RUBY_ENGINE == "truffleruby"
s = create_string_scanner('a b c d e')
s.scan('a')
assert_equal(' b c d e', s.post_match)
s.skip(' ')
assert_equal('b c d e', s.post_match)
s.scan('b')
assert_equal(' c d e', s.post_match)
s.scan_until('c')
assert_equal(' d e', s.post_match)
s.getch
assert_equal('d e', s.post_match)
s.get_byte
assert_equal(' e', s.post_match)
s.get_byte
assert_equal('e', s.post_match)
s.scan('never match')
assert_nil(s.post_match)
end
def test_terminate
s = create_string_scanner('ssss')
s.getch