string.c: fix for UTF-32
* string.c (rb_str_scrub): fix for UTF-32. strlen() on strings contain NUL returns wrong result, use sizeof operator instead. [ruby-dev:45975] [Feature #6752] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@40417 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
eae1366b38
commit
596ca948b1
@ -1,3 +1,9 @@
|
|||||||
|
Tue Apr 23 11:58:46 2013 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||||
|
|
||||||
|
* string.c (rb_str_scrub): fix for UTF-32. strlen() on strings
|
||||||
|
contain NUL returns wrong result, use sizeof operator instead.
|
||||||
|
[ruby-dev:45975] [Feature #6752]
|
||||||
|
|
||||||
Tue Apr 23 10:26:50 2013 Akinori MUSHA <knu@iDaemons.org>
|
Tue Apr 23 10:26:50 2013 Akinori MUSHA <knu@iDaemons.org>
|
||||||
|
|
||||||
* test/ruby/test_module.rb
|
* test/ruby/test_module.rb
|
||||||
|
26
string.c
26
string.c
@ -7805,6 +7805,11 @@ rb_str_scrub(int argc, VALUE *argv, VALUE str)
|
|||||||
return rb_str_dup(str);
|
return rb_str_dup(str);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define DEFAULT_REPLACE_CHAR(str) do { \
|
||||||
|
static const char replace[sizeof(str)-1] = str; \
|
||||||
|
rep = replace; replen = (int)sizeof(replace); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
if (rb_enc_asciicompat(enc)) {
|
if (rb_enc_asciicompat(enc)) {
|
||||||
const char *p = RSTRING_PTR(str);
|
const char *p = RSTRING_PTR(str);
|
||||||
const char *e = RSTRING_END(str);
|
const char *e = RSTRING_END(str);
|
||||||
@ -7824,13 +7829,11 @@ rb_str_scrub(int argc, VALUE *argv, VALUE str)
|
|||||||
rep7bit_p = (ENC_CODERANGE(repl) == ENC_CODERANGE_7BIT);
|
rep7bit_p = (ENC_CODERANGE(repl) == ENC_CODERANGE_7BIT);
|
||||||
}
|
}
|
||||||
else if (enc == rb_utf8_encoding()) {
|
else if (enc == rb_utf8_encoding()) {
|
||||||
rep = "\xEF\xBF\xBD";
|
DEFAULT_REPLACE_CHAR("\xEF\xBF\xBD");
|
||||||
replen = strlen(rep);
|
|
||||||
rep7bit_p = FALSE;
|
rep7bit_p = FALSE;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
rep = "?";
|
DEFAULT_REPLACE_CHAR("?");
|
||||||
replen = strlen(rep);
|
|
||||||
rep7bit_p = TRUE;
|
rep7bit_p = TRUE;
|
||||||
}
|
}
|
||||||
cr = ENC_CODERANGE_7BIT;
|
cr = ENC_CODERANGE_7BIT;
|
||||||
@ -7938,24 +7941,19 @@ rb_str_scrub(int argc, VALUE *argv, VALUE str)
|
|||||||
replen = RSTRING_LEN(repl);
|
replen = RSTRING_LEN(repl);
|
||||||
}
|
}
|
||||||
else if (enc == utf16be) {
|
else if (enc == utf16be) {
|
||||||
rep = "\xFF\xFD";
|
DEFAULT_REPLACE_CHAR("\xFF\xFD");
|
||||||
replen = strlen(rep);
|
|
||||||
}
|
}
|
||||||
else if (enc == utf16le) {
|
else if (enc == utf16le) {
|
||||||
rep = "\xFD\xFF";
|
DEFAULT_REPLACE_CHAR("\xFD\xFF");
|
||||||
replen = strlen(rep);
|
|
||||||
}
|
}
|
||||||
else if (enc == utf32be) {
|
else if (enc == utf32be) {
|
||||||
rep = "\x00\x00\xFF\xFD";
|
DEFAULT_REPLACE_CHAR("\x00\x00\xFF\xFD");
|
||||||
replen = strlen(rep);
|
|
||||||
}
|
}
|
||||||
else if (enc == utf32le) {
|
else if (enc == utf32le) {
|
||||||
rep = "\xFD\xFF\x00\x00";
|
DEFAULT_REPLACE_CHAR("\xFD\xFF\x00\x00");
|
||||||
replen = strlen(rep);
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
rep = "?";
|
DEFAULT_REPLACE_CHAR("?");
|
||||||
replen = strlen(rep);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
while (p < e) {
|
while (p < e) {
|
||||||
|
@ -1522,5 +1522,11 @@ class TestM17N < Test::Unit::TestCase
|
|||||||
assert_equal("\uFFFD\u3042".encode("UTF-16LE"),
|
assert_equal("\uFFFD\u3042".encode("UTF-16LE"),
|
||||||
"\x00\xD8\x42\x30".force_encoding(Encoding::UTF_16LE).
|
"\x00\xD8\x42\x30".force_encoding(Encoding::UTF_16LE).
|
||||||
scrub)
|
scrub)
|
||||||
|
assert_equal("\uFFFD".encode("UTF-32BE"),
|
||||||
|
"\xff".force_encoding(Encoding::UTF_32BE).
|
||||||
|
scrub)
|
||||||
|
assert_equal("\uFFFD".encode("UTF-32LE"),
|
||||||
|
"\xff".force_encoding(Encoding::UTF_32LE).
|
||||||
|
scrub)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
Loading…
x
Reference in New Issue
Block a user