Use end of char boundary in start_with?
Previously we used the next character following the found prefix to determine if the match ended on a broken character. This had caused surprising behaviour when a valid character was followed by a UTF-8 continuation byte. This commit changes the behaviour to instead look for the end of the last character in the prefix. [Bug #19784] Co-authored-by: ywenc <ywenc@github.com> Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
This commit is contained in:
parent
2ca0f01015
commit
d89b15cdce
Notes:
git
2023-09-01 23:23:49 +00:00
@ -125,6 +125,15 @@ at_char_boundary(const char *s, const char *p, const char *e, rb_encoding *enc)
|
|||||||
return rb_enc_left_char_head(s, p, e, enc) == p;
|
return rb_enc_left_char_head(s, p, e, enc) == p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
at_char_right_boundary(const char *s, const char *p, const char *e, rb_encoding *enc)
|
||||||
|
{
|
||||||
|
RUBY_ASSERT(s <= p);
|
||||||
|
RUBY_ASSERT(p <= e);
|
||||||
|
|
||||||
|
return rb_enc_right_char_head(s, p, e, enc) == p;
|
||||||
|
}
|
||||||
|
|
||||||
/* expect tail call optimization */
|
/* expect tail call optimization */
|
||||||
// YJIT needs this function to never allocate and never raise
|
// YJIT needs this function to never allocate and never raise
|
||||||
static inline VALUE
|
static inline VALUE
|
||||||
|
4
string.c
4
string.c
@ -10472,7 +10472,7 @@ rb_str_start_with(int argc, VALUE *argv, VALUE str)
|
|||||||
p = RSTRING_PTR(str);
|
p = RSTRING_PTR(str);
|
||||||
e = p + slen;
|
e = p + slen;
|
||||||
s = p + tlen;
|
s = p + tlen;
|
||||||
if (!at_char_boundary(p, s, e, enc))
|
if (!at_char_right_boundary(p, s, e, enc))
|
||||||
continue;
|
continue;
|
||||||
if (memcmp(p, RSTRING_PTR(tmp), tlen) == 0)
|
if (memcmp(p, RSTRING_PTR(tmp), tlen) == 0)
|
||||||
return Qtrue;
|
return Qtrue;
|
||||||
@ -10554,7 +10554,7 @@ deleted_prefix_length(VALUE str, VALUE prefix)
|
|||||||
}
|
}
|
||||||
const char *strend = strptr + olen;
|
const char *strend = strptr + olen;
|
||||||
const char *after_prefix = strptr + prefixlen;
|
const char *after_prefix = strptr + prefixlen;
|
||||||
if (!at_char_boundary(strptr, after_prefix, strend, enc)) {
|
if (!at_char_right_boundary(strptr, after_prefix, strend, enc)) {
|
||||||
/* prefix does not end at char-boundary */
|
/* prefix does not end at char-boundary */
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user