* string.c (rb_str_inspect): dump as \uXXXX when the

string is in Unicode. [ruby-dev:39388]

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@25143 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2009-09-29 00:34:06 +00:00
parent f7554133b2
commit 3058eec581
3 changed files with 59 additions and 33 deletions

View File

@ -1,3 +1,8 @@
Tue Sep 29 06:50:32 2009 NARUSE, Yui <naruse@ruby-lang.org>
* string.c (rb_str_inspect): dump as \uXXXX when the
string is in Unicode. [ruby-dev:39388]
Tue Sep 29 06:49:16 2009 NARUSE, Yui <naruse@ruby-lang.org>
* encoding.c (rb_enc_unicode_p): defined.

View File

@ -4061,6 +4061,7 @@ rb_str_inspect(VALUE str)
char *p, *pend;
VALUE result = rb_str_buf_new(0);
rb_encoding *resenc = rb_default_internal_encoding();
int unicode_p = rb_enc_unicode_p(enc);
if (resenc == NULL) resenc = rb_default_external_encoding();
if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
@ -4069,7 +4070,7 @@ rb_str_inspect(VALUE str)
p = RSTRING_PTR(str); pend = RSTRING_END(str);
while (p < pend) {
unsigned int c, cc;
unsigned int c = -1, cc;
int n;
n = rb_enc_precise_mbclen(p, pend, enc);
@ -4114,18 +4115,29 @@ rb_str_inspect(VALUE str)
else if (c == 033) {
str_buf_cat2(result, "\\e");
}
else if ((enc == resenc && rb_enc_isprint(c, enc)) || rb_enc_isascii(c, enc)) {
else if ((enc == resenc && rb_enc_isprint(c, enc)) ||
(rb_enc_isascii(c, enc) && ISPRINT(c))) {
str_buf_cat(result, p-n, n);
}
else {
char *q;
char buf[11];
escape_codepoint:
for (q = p-n; q < p; q++) {
#define BACKESC_BUFSIZE 5
char buf[BACKESC_BUFSIZE];
sprintf(buf, "\\x%02X", *q & 0377);
str_buf_cat(result, buf, BACKESC_BUFSIZE - 1);
#undef BACKESC_BUFSIZE
if (unicode_p && c != -1) {
if (c > 0xFFFF) {
sprintf(buf, "\\u{%X}", c);
}
else {
sprintf(buf, "\\u%04X", c);
}
str_buf_cat(result, buf, strlen(buf));
}
else {
char *q;
for (q = p-n; q < p; q++) {
sprintf(buf, "\\x%02X", *q & 0377);
str_buf_cat(result, buf, strlen(buf));
}
}
}
}

View File

@ -2,6 +2,15 @@ require 'test/unit'
require 'stringio'
class TestM17N < Test::Unit::TestCase
def inspect_encoding
Encoding.default_internal || Encoding.default_external
end
def setup
Encoding.default_internal = nil
Encoding.default_external = Encoding::UTF_8
end
def assert_encoding(encname, actual, message=nil)
assert_equal(Encoding.find(encname), actual, message)
end
@ -201,10 +210,10 @@ class TestM17N < Test::Unit::TestCase
assert_equal('"\xFC\x80\x80\x80\x80 "', u("\xfc\x80\x80\x80\x80 ").inspect)
assert_equal(e("\"\\xA1\x8f\xA1\xA1\""), e("\xa1\x8f\xa1\xa1").inspect)
assert_equal("\"\\xA1\\x8F\\xA1\\xA1\"", e("\xa1\x8f\xa1\xa1").inspect)
assert_equal('"\x81."', s("\x81.").inspect)
assert_equal(s("\"\x81@\""), s("\x81@").inspect)
assert_equal(s('"\x81\x40"'), s("\x81@").inspect)
assert_equal('"\xFC"', u("\xfc").inspect)
end
@ -756,30 +765,30 @@ class TestM17N < Test::Unit::TestCase
end
def test_sprintf_p
assert_strenc('""', 'ASCII-8BIT', a("%p") % a(""))
assert_strenc('""', 'EUC-JP', e("%p") % e(""))
assert_strenc('""', 'Windows-31J', s("%p") % s(""))
assert_strenc('""', 'UTF-8', u("%p") % u(""))
assert_strenc('""', inspect_encoding, a("%p") % a(""))
assert_strenc('""', inspect_encoding, e("%p") % e(""))
assert_strenc('""', inspect_encoding, s("%p") % s(""))
assert_strenc('""', inspect_encoding, u("%p") % u(""))
assert_strenc('"a"', 'ASCII-8BIT', a("%p") % a("a"))
assert_strenc('"a"', 'EUC-JP', e("%p") % e("a"))
assert_strenc('"a"', 'Windows-31J', s("%p") % s("a"))
assert_strenc('"a"', 'UTF-8', u("%p") % u("a"))
assert_strenc('"a"', inspect_encoding, a("%p") % a("a"))
assert_strenc('"a"', inspect_encoding, e("%p") % e("a"))
assert_strenc('"a"', inspect_encoding, s("%p") % s("a"))
assert_strenc('"a"', inspect_encoding, u("%p") % u("a"))
assert_strenc('"\xC2\xA1"', 'ASCII-8BIT', a("%p") % a("\xc2\xa1"))
assert_strenc("\"\xC2\xA1\"", 'EUC-JP', e("%p") % e("\xc2\xa1"))
#assert_strenc("\"\xC2\xA1\"", 'Windows-31J', s("%p") % s("\xc2\xa1"))
assert_strenc("\"\xC2\xA1\"", 'UTF-8', u("%p") % u("\xc2\xa1"))
assert_strenc('"\xC2\xA1"', inspect_encoding, a("%p") % a("\xc2\xa1"))
assert_strenc('"\xC2\xA1"', inspect_encoding, e("%p") % e("\xc2\xa1"))
#assert_strenc("\"\xC2\xA1\"", inspect_encoding, s("%p") % s("\xc2\xa1"))
assert_strenc("\"\xC2\xA1\"", inspect_encoding, u("%p") % u("\xc2\xa1"))
assert_strenc('"\xC2\xA1"', 'ASCII-8BIT', "%10p" % a("\xc2\xa1"))
assert_strenc(" \"\xC2\xA1\"", 'EUC-JP', "%10p" % e("\xc2\xa1"))
#assert_strenc(" \"\xC2\xA1\"", 'Windows-31J', "%10p" % s("\xc2\xa1"))
assert_strenc(" \"\xC2\xA1\"", 'UTF-8', "%10p" % u("\xc2\xa1"))
assert_strenc('"\xC2\xA1"', inspect_encoding, "%10p" % a("\xc2\xa1"))
assert_strenc('"\xC2\xA1"', inspect_encoding, "%10p" % e("\xc2\xa1"))
#assert_strenc(" \"\xC2\xA1\"", inspect_encoding, "%10p" % s("\xc2\xa1"))
assert_strenc(" \"\xC2\xA1\"", inspect_encoding, "%10p" % u("\xc2\xa1"))
assert_strenc('"\x00"', 'ASCII-8BIT', a("%p") % a("\x00"))
assert_strenc('"\x00"', 'EUC-JP', e("%p") % e("\x00"))
assert_strenc('"\x00"', 'Windows-31J', s("%p") % s("\x00"))
assert_strenc('"\x00"', 'UTF-8', u("%p") % u("\x00"))
assert_strenc('"\x00"', inspect_encoding, a("%p") % a("\x00"))
assert_strenc('"\x00"', inspect_encoding, e("%p") % e("\x00"))
assert_strenc('"\x00"', inspect_encoding, s("%p") % s("\x00"))
assert_strenc('"\u0000"', inspect_encoding, u("%p") % u("\x00"))
end
def test_sprintf_s
@ -1176,8 +1185,8 @@ class TestM17N < Test::Unit::TestCase
assert_equal(Encoding::US_ASCII, [].to_s.encoding)
assert_equal(Encoding::US_ASCII, [nil].to_s.encoding)
assert_equal(Encoding::US_ASCII, [1].to_s.encoding)
assert_equal(Encoding::US_ASCII, [""].to_s.encoding)
assert_equal(Encoding::US_ASCII, ["a"].to_s.encoding)
assert_equal(inspect_encoding, [""].to_s.encoding)
assert_equal(inspect_encoding, ["a"].to_s.encoding)
assert_equal(Encoding::US_ASCII, [nil,1,"","a","\x20",[]].to_s.encoding)
end