[Bug #19784] Fix behaviors against prefix with broken encoding
- String#start_with? - String#delete_prefix - String#delete_prefix!
This commit is contained in:
parent
808b067088
commit
b054c2fe06
Notes:
git
2023-08-25 23:58:22 +00:00
@ -7,12 +7,14 @@ describe "String#start_with?" do
|
|||||||
it_behaves_like :start_with, :to_s
|
it_behaves_like :start_with, :to_s
|
||||||
|
|
||||||
# Here and not in the shared examples because this is invalid as a Symbol
|
# Here and not in the shared examples because this is invalid as a Symbol
|
||||||
it "does not check that we are not starting to match at the head of a character" do
|
it "matches part of a character with the same part" do
|
||||||
"\xA9".should.start_with?("\xA9") # A9 is not a character head for UTF-8
|
"\xA9".should.start_with?("\xA9") # A9 is not a character head for UTF-8
|
||||||
end
|
end
|
||||||
|
|
||||||
it "does not check we are matching only part of a character" do
|
ruby_bug "#19784", ""..."3.3" do
|
||||||
"\xe3\x81\x82".size.should == 1
|
it "checks we are matching only part of a character" do
|
||||||
"\xe3\x81\x82".should.start_with?("\xe3")
|
"\xe3\x81\x82".size.should == 1
|
||||||
|
"\xe3\x81\x82".should_not.start_with?("\xe3")
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -70,7 +70,9 @@ describe :start_with, shared: true do
|
|||||||
$1.should be_nil
|
$1.should be_nil
|
||||||
end
|
end
|
||||||
|
|
||||||
it "does not check that we are not matching part of a character" do
|
ruby_bug "#19784", ""..."3.3" do
|
||||||
"\xC3\xA9".send(@method).should.start_with?("\xC3")
|
it "checks that we are not matching part of a character" do
|
||||||
|
"\xC3\xA9".send(@method).should_not.start_with?("\xC3")
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
56
string.c
56
string.c
@ -10461,10 +10461,20 @@ rb_str_start_with(int argc, VALUE *argv, VALUE str)
|
|||||||
return Qtrue;
|
return Qtrue;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
const char *p, *s, *e;
|
||||||
|
long slen, tlen;
|
||||||
|
rb_encoding *enc;
|
||||||
|
|
||||||
StringValue(tmp);
|
StringValue(tmp);
|
||||||
rb_enc_check(str, tmp);
|
enc = rb_enc_check(str, tmp);
|
||||||
if (RSTRING_LEN(str) < RSTRING_LEN(tmp)) continue;
|
if ((tlen = RSTRING_LEN(tmp)) == 0) return Qtrue;
|
||||||
if (memcmp(RSTRING_PTR(str), RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0)
|
if ((slen = RSTRING_LEN(str)) < tlen) continue;
|
||||||
|
p = RSTRING_PTR(str);
|
||||||
|
e = p + slen;
|
||||||
|
s = p + tlen;
|
||||||
|
if (!at_char_boundary(p, s, e, enc))
|
||||||
|
continue;
|
||||||
|
if (memcmp(p, RSTRING_PTR(tmp), tlen) == 0)
|
||||||
return Qtrue;
|
return Qtrue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -10483,12 +10493,13 @@ static VALUE
|
|||||||
rb_str_end_with(int argc, VALUE *argv, VALUE str)
|
rb_str_end_with(int argc, VALUE *argv, VALUE str)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
char *p, *s, *e;
|
|
||||||
rb_encoding *enc;
|
|
||||||
|
|
||||||
for (i=0; i<argc; i++) {
|
for (i=0; i<argc; i++) {
|
||||||
VALUE tmp = argv[i];
|
VALUE tmp = argv[i];
|
||||||
|
const char *p, *s, *e;
|
||||||
long slen, tlen;
|
long slen, tlen;
|
||||||
|
rb_encoding *enc;
|
||||||
|
|
||||||
StringValue(tmp);
|
StringValue(tmp);
|
||||||
enc = rb_enc_check(str, tmp);
|
enc = rb_enc_check(str, tmp);
|
||||||
if ((tlen = RSTRING_LEN(tmp)) == 0) return Qtrue;
|
if ((tlen = RSTRING_LEN(tmp)) == 0) return Qtrue;
|
||||||
@ -10498,7 +10509,7 @@ rb_str_end_with(int argc, VALUE *argv, VALUE str)
|
|||||||
s = e - tlen;
|
s = e - tlen;
|
||||||
if (!at_char_boundary(p, s, e, enc))
|
if (!at_char_boundary(p, s, e, enc))
|
||||||
continue;
|
continue;
|
||||||
if (memcmp(s, RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0)
|
if (memcmp(s, RSTRING_PTR(tmp), tlen) == 0)
|
||||||
return Qtrue;
|
return Qtrue;
|
||||||
}
|
}
|
||||||
return Qfalse;
|
return Qfalse;
|
||||||
@ -10516,12 +10527,17 @@ rb_str_end_with(int argc, VALUE *argv, VALUE str)
|
|||||||
static long
|
static long
|
||||||
deleted_prefix_length(VALUE str, VALUE prefix)
|
deleted_prefix_length(VALUE str, VALUE prefix)
|
||||||
{
|
{
|
||||||
char *strptr, *prefixptr;
|
const char *strptr, *prefixptr;
|
||||||
long olen, prefixlen;
|
long olen, prefixlen;
|
||||||
|
rb_encoding *enc = rb_enc_get(str);
|
||||||
|
|
||||||
StringValue(prefix);
|
StringValue(prefix);
|
||||||
if (is_broken_string(prefix)) return 0;
|
|
||||||
rb_enc_check(str, prefix);
|
if (!is_broken_string(prefix) ||
|
||||||
|
!rb_enc_asciicompat(enc) ||
|
||||||
|
!rb_enc_asciicompat(rb_enc_get(prefix))) {
|
||||||
|
enc = rb_enc_check(str, prefix);
|
||||||
|
}
|
||||||
|
|
||||||
/* return 0 if not start with prefix */
|
/* return 0 if not start with prefix */
|
||||||
prefixlen = RSTRING_LEN(prefix);
|
prefixlen = RSTRING_LEN(prefix);
|
||||||
@ -10531,6 +10547,19 @@ deleted_prefix_length(VALUE str, VALUE prefix)
|
|||||||
strptr = RSTRING_PTR(str);
|
strptr = RSTRING_PTR(str);
|
||||||
prefixptr = RSTRING_PTR(prefix);
|
prefixptr = RSTRING_PTR(prefix);
|
||||||
if (memcmp(strptr, prefixptr, prefixlen) != 0) return 0;
|
if (memcmp(strptr, prefixptr, prefixlen) != 0) return 0;
|
||||||
|
if (is_broken_string(prefix)) {
|
||||||
|
if (!is_broken_string(str)) {
|
||||||
|
/* prefix in a valid string cannot be broken */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
const char *strend = strptr + olen;
|
||||||
|
const char *after_prefix = strptr + prefixlen;
|
||||||
|
if (!at_char_boundary(strptr, after_prefix, strend, enc)) {
|
||||||
|
/* prefix does not end at char-boundary */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* prefix part in `str` also should be valid. */
|
||||||
|
|
||||||
return prefixlen;
|
return prefixlen;
|
||||||
}
|
}
|
||||||
@ -10587,7 +10616,7 @@ rb_str_delete_prefix(VALUE str, VALUE prefix)
|
|||||||
static long
|
static long
|
||||||
deleted_suffix_length(VALUE str, VALUE suffix)
|
deleted_suffix_length(VALUE str, VALUE suffix)
|
||||||
{
|
{
|
||||||
char *strptr, *suffixptr, *s;
|
const char *strptr, *suffixptr;
|
||||||
long olen, suffixlen;
|
long olen, suffixlen;
|
||||||
rb_encoding *enc;
|
rb_encoding *enc;
|
||||||
|
|
||||||
@ -10602,9 +10631,10 @@ deleted_suffix_length(VALUE str, VALUE suffix)
|
|||||||
if (olen < suffixlen) return 0;
|
if (olen < suffixlen) return 0;
|
||||||
strptr = RSTRING_PTR(str);
|
strptr = RSTRING_PTR(str);
|
||||||
suffixptr = RSTRING_PTR(suffix);
|
suffixptr = RSTRING_PTR(suffix);
|
||||||
s = strptr + olen - suffixlen;
|
const char *strend = strptr + olen;
|
||||||
if (memcmp(s, suffixptr, suffixlen) != 0) return 0;
|
const char *before_suffix = strend - suffixlen;
|
||||||
if (!at_char_boundary(strptr, s, strptr + olen, enc)) return 0;
|
if (memcmp(before_suffix, suffixptr, suffixlen) != 0) return 0;
|
||||||
|
if (!at_char_boundary(strptr, before_suffix, strend, enc)) return 0;
|
||||||
|
|
||||||
return suffixlen;
|
return suffixlen;
|
||||||
}
|
}
|
||||||
|
@ -1938,6 +1938,8 @@ CODE
|
|||||||
assert_send([S("hello"), :start_with?, S("hel")])
|
assert_send([S("hello"), :start_with?, S("hel")])
|
||||||
assert_not_send([S("hello"), :start_with?, S("el")])
|
assert_not_send([S("hello"), :start_with?, S("el")])
|
||||||
assert_send([S("hello"), :start_with?, S("el"), S("he")])
|
assert_send([S("hello"), :start_with?, S("el"), S("he")])
|
||||||
|
assert_send([S("\xFF\xFE"), :start_with?, S("\xFF")])
|
||||||
|
assert_not_send([S("\u{c4}"), :start_with?, S("\xC3")])
|
||||||
|
|
||||||
bug5536 = '[ruby-core:40623]'
|
bug5536 = '[ruby-core:40623]'
|
||||||
assert_raise(TypeError, bug5536) {S("str").start_with? :not_convertible_to_string}
|
assert_raise(TypeError, bug5536) {S("str").start_with? :not_convertible_to_string}
|
||||||
@ -2930,6 +2932,7 @@ CODE
|
|||||||
assert_equal("\x95\x5c".force_encoding("Shift_JIS"), s.delete_prefix("\x95"))
|
assert_equal("\x95\x5c".force_encoding("Shift_JIS"), s.delete_prefix("\x95"))
|
||||||
assert_equal("\x95\x5c".force_encoding("Shift_JIS"), s)
|
assert_equal("\x95\x5c".force_encoding("Shift_JIS"), s)
|
||||||
|
|
||||||
|
assert_equal("\xFE", S("\xFF\xFE").delete_prefix("\xFF"))
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_delete_prefix_clear_coderange
|
def test_delete_prefix_clear_coderange
|
||||||
@ -2978,6 +2981,9 @@ CODE
|
|||||||
assert_equal(nil, s.delete_prefix!("\xe3"))
|
assert_equal(nil, s.delete_prefix!("\xe3"))
|
||||||
assert_equal("\xe3\x81\x82", s)
|
assert_equal("\xe3\x81\x82", s)
|
||||||
|
|
||||||
|
s = S("\xFF\xFE")
|
||||||
|
assert_equal("\xFE", s.delete_prefix!("\xFF"))
|
||||||
|
assert_equal("\xFE", s)
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_delete_prefix_bang_clear_coderange
|
def test_delete_prefix_bang_clear_coderange
|
||||||
|
Loading…
x
Reference in New Issue
Block a user