Introduce at_char_boundary function

This commit is contained in:
Nobuyoshi Nakada 2023-08-24 18:32:46 +09:00
parent cfdbbd6726
commit 00ac3a64ba
Notes: git 2023-08-25 23:58:23 +00:00
4 changed files with 12 additions and 8 deletions

2
file.c
View File

@ -4610,7 +4610,7 @@ rmext(const char *p, long l0, long l1, const char *e, long l2, rb_encoding *enc)
if (l1 < l2) return l1; if (l1 < l2) return l1;
s = p+l1-l2; s = p+l1-l2;
if (rb_enc_left_char_head(p, s, p+l1, enc) != s) return 0; if (!at_char_boundary(p, s, p+l1, enc)) return 0;
#if CASEFOLD_FILESYSTEM #if CASEFOLD_FILESYSTEM
#define fncomp strncasecmp #define fncomp strncasecmp
#else #else

View File

@ -119,6 +119,12 @@ is_broken_string(VALUE str)
return rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN; return rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN;
} }
static inline bool
at_char_boundary(const char *s, const char *p, const char *e, rb_encoding *enc)
{
return rb_enc_left_char_head(s, p, e, enc) == p;
}
/* expect tail call optimization */ /* expect tail call optimization */
// YJIT needs this function to never allocate and never raise // YJIT needs this function to never allocate and never raise
static inline VALUE static inline VALUE

3
io.c
View File

@ -4144,8 +4144,7 @@ rb_io_getline_0(VALUE rs, long limit, int chomp, rb_io_t *fptr)
s = RSTRING_PTR(str); s = RSTRING_PTR(str);
e = RSTRING_END(str); e = RSTRING_END(str);
p = e - rslen; p = e - rslen;
pp = rb_enc_left_char_head(s, p, e, enc); if (!at_char_boundary(s, p, e, enc)) continue;
if (pp != p) continue;
if (!rspara) rscheck(rsptr, rslen, rs); if (!rspara) rscheck(rsptr, rslen, rs);
if (memcmp(p, rsptr, rslen) == 0) { if (memcmp(p, rsptr, rslen) == 0) {
if (chomp) { if (chomp) {

View File

@ -3930,8 +3930,7 @@ str_ensure_byte_pos(VALUE str, long pos)
const char *s = RSTRING_PTR(str); const char *s = RSTRING_PTR(str);
const char *e = RSTRING_END(str); const char *e = RSTRING_END(str);
const char *p = s + pos; const char *p = s + pos;
const char *pp = rb_enc_left_char_head(s, p, e, rb_enc_get(str)); if (!at_char_boundary(s, p, e, rb_enc_get(str))) {
if (p != pp) {
rb_raise(rb_eIndexError, rb_raise(rb_eIndexError,
"offset %ld does not land on character boundary", pos); "offset %ld does not land on character boundary", pos);
} }
@ -9521,7 +9520,7 @@ chompped_length(VALUE str, VALUE rs)
if (p[len-1] == newline && if (p[len-1] == newline &&
(rslen <= 1 || (rslen <= 1 ||
memcmp(rsptr, pp, rslen) == 0)) { memcmp(rsptr, pp, rslen) == 0)) {
if (rb_enc_left_char_head(p, pp, e, enc) == pp) if (at_char_boundary(p, pp, e, enc))
return len - rslen; return len - rslen;
RB_GC_GUARD(rs); RB_GC_GUARD(rs);
} }
@ -10497,7 +10496,7 @@ rb_str_end_with(int argc, VALUE *argv, VALUE str)
p = RSTRING_PTR(str); p = RSTRING_PTR(str);
e = p + slen; e = p + slen;
s = e - tlen; s = e - tlen;
if (rb_enc_left_char_head(p, s, e, enc) != s) if (!at_char_boundary(p, s, e, enc))
continue; continue;
if (memcmp(s, RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0) if (memcmp(s, RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0)
return Qtrue; return Qtrue;
@ -10605,7 +10604,7 @@ deleted_suffix_length(VALUE str, VALUE suffix)
suffixptr = RSTRING_PTR(suffix); suffixptr = RSTRING_PTR(suffix);
s = strptr + olen - suffixlen; s = strptr + olen - suffixlen;
if (memcmp(s, suffixptr, suffixlen) != 0) return 0; if (memcmp(s, suffixptr, suffixlen) != 0) return 0;
if (rb_enc_left_char_head(strptr, s, strptr + olen, enc) != s) return 0; if (!at_char_boundary(strptr, s, strptr + olen, enc)) return 0;
return suffixlen; return suffixlen;
} }