[Bug #19902] Update the coderange regarding the changed region

This commit is contained in:
Nobuyoshi Nakada 2023-09-26 13:25:44 +09:00
parent f0d8278607
commit 6b66b5fded
3 changed files with 66 additions and 0 deletions

View File

@ -7,8 +7,18 @@ bug_str_set_len(VALUE str, VALUE len)
return str;
}
static VALUE
bug_str_append(VALUE str, VALUE addendum)
{
StringValue(addendum);
rb_str_modify_expand(str, RSTRING_LEN(addendum));
memcpy(RSTRING_END(str), RSTRING_PTR(addendum), RSTRING_LEN(addendum));
return str;
}
void
Init_string_set_len(VALUE klass)
{
rb_define_method(klass, "set_len", bug_str_set_len, 1);
rb_define_method(klass, "append", bug_str_append, 1);
}

View File

@ -2985,6 +2985,33 @@ rb_str_set_len(VALUE str, long len)
if (len > (capa = (long)str_capacity(str, termlen)) || len < 0) {
rb_bug("probable buffer overflow: %ld for %ld", len, capa);
}
int cr = ENC_CODERANGE(str);
if (cr == ENC_CODERANGE_UNKNOWN) {
/* Leave unknown. */
}
else if (len > RSTRING_LEN(str)) {
if (ENC_CODERANGE_CLEAN_P(cr)) {
/* Update the coderange regarding the extended part. */
const char *const prev_end = RSTRING_END(str);
const char *const new_end = RSTRING_PTR(str) + len;
rb_encoding *enc = rb_enc_get(str);
rb_str_coderange_scan_restartable(prev_end, new_end, enc, &cr);
ENC_CODERANGE_SET(str, cr);
}
else if (cr == ENC_CODERANGE_BROKEN) {
/* May be valid now, by appended part. */
ENC_CODERANGE_SET(str, ENC_CODERANGE_UNKNOWN);
}
}
else if (len < RSTRING_LEN(str)) {
if (cr != ENC_CODERANGE_7BIT) {
/* ASCII-only string is keeping after truncated. Valid
* and broken may be invalid or valid, leave unknown. */
ENC_CODERANGE_SET(str, ENC_CODERANGE_UNKNOWN);
}
}
STR_SET_LEN(str, len);
TERM_FILL(&RSTRING_PTR(str)[len], termlen);
}

View File

@ -34,4 +34,33 @@ class Test_StrSetLen < Test::Unit::TestCase
assert_equal 128, Bug::String.capacity(str)
assert_equal 127, str.set_len(127).bytesize, bug12757
end
def test_coderange_after_append
u = -"\u3042"
str = Bug::String.new(encoding: Encoding::UTF_8)
bsize = u.bytesize
str.append(u)
assert_equal 0, str.bytesize
str.set_len(bsize)
assert_equal bsize, str.bytesize
assert_predicate str, :valid_encoding?
assert_not_predicate str, :ascii_only?
assert_equal u, str
end
def test_coderange_after_trunc
u = -"\u3042"
bsize = u.bytesize
str = Bug::String.new(u)
str.set_len(bsize - 1)
assert_equal bsize - 1, str.bytesize
assert_not_predicate str, :valid_encoding?
assert_not_predicate str, :ascii_only?
str.append(u.byteslice(-1))
str.set_len(bsize)
assert_equal bsize, str.bytesize
assert_predicate str, :valid_encoding?
assert_not_predicate str, :ascii_only?
assert_equal u, str
end
end