* enc/trans/utf_16_32.trans: add a convert from UTF-8 to UTF-16.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@29892 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
3ab82a65d7
commit
7f38397b6c
@ -1,3 +1,7 @@
|
|||||||
|
Wed Nov 24 05:40:33 2010 NARUSE, Yui <naruse@ruby-lang.org>
|
||||||
|
|
||||||
|
* enc/trans/utf_16_32.trans: add a convert from UTF-8 to UTF-16.
|
||||||
|
|
||||||
Wed Nov 24 03:21:35 2010 NARUSE, Yui <naruse@ruby-lang.org>
|
Wed Nov 24 03:21:35 2010 NARUSE, Yui <naruse@ruby-lang.org>
|
||||||
|
|
||||||
* enc/trans/utf_16_32.trans: raise error on unpaired upper
|
* enc/trans/utf_16_32.trans: raise error on unpaired upper
|
||||||
|
@ -289,7 +289,7 @@ fun_si_from_utf_16(void *statep, const unsigned char *s, size_t l)
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case BE:
|
case BE:
|
||||||
if (s[0] < 0xD8 && 0xDF < s[0]) {
|
if (s[0] < 0xD8 || 0xDF < s[0]) {
|
||||||
return (VALUE)FUNso;
|
return (VALUE)FUNso;
|
||||||
}
|
}
|
||||||
else if (s[0] <= 0xDB) {
|
else if (s[0] <= 0xDB) {
|
||||||
@ -297,7 +297,7 @@ fun_si_from_utf_16(void *statep, const unsigned char *s, size_t l)
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case LE:
|
case LE:
|
||||||
if (s[1] < 0xD8 && 0xDF < s[1]) {
|
if (s[1] < 0xD8 || 0xDF < s[1]) {
|
||||||
return (VALUE)FUNso;
|
return (VALUE)FUNso;
|
||||||
}
|
}
|
||||||
else if (s[1] <= 0xDB) {
|
else if (s[1] <= 0xDB) {
|
||||||
@ -321,6 +321,19 @@ fun_so_from_utf_16(void *statep, const unsigned char *s, size_t l, unsigned char
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static ssize_t
|
||||||
|
fun_so_to_utf_16(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
|
||||||
|
{
|
||||||
|
unsigned char *sp = statep;
|
||||||
|
if (*sp == 0) {
|
||||||
|
*o++ = 0xFE;
|
||||||
|
*o++ = 0xFF;
|
||||||
|
*sp = 1;
|
||||||
|
return 2 + fun_so_to_utf_16be(statep, s, l, o, osize);
|
||||||
|
}
|
||||||
|
return fun_so_to_utf_16be(statep, s, l, o, osize);
|
||||||
|
}
|
||||||
|
|
||||||
static const rb_transcoder
|
static const rb_transcoder
|
||||||
rb_from_UTF_16BE = {
|
rb_from_UTF_16BE = {
|
||||||
"UTF-16BE", "UTF-8", from_UTF_16BE,
|
"UTF-16BE", "UTF-8", from_UTF_16BE,
|
||||||
@ -429,6 +442,18 @@ rb_from_UTF_16 = {
|
|||||||
NULL, fun_si_from_utf_16, NULL, fun_so_from_utf_16
|
NULL, fun_si_from_utf_16, NULL, fun_so_from_utf_16
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const rb_transcoder
|
||||||
|
rb_to_UTF_16 = {
|
||||||
|
"UTF-8", "UTF-16", from_UTF_8,
|
||||||
|
TRANSCODE_TABLE_INFO,
|
||||||
|
1, /* input_unit_length */
|
||||||
|
4, /* max_input */
|
||||||
|
4, /* max_output */
|
||||||
|
asciicompat_encoder, /* asciicompat_type */
|
||||||
|
1, state_init, NULL, /* state_size, state_init, state_fini */
|
||||||
|
NULL, NULL, NULL, fun_so_to_utf_16
|
||||||
|
};
|
||||||
|
|
||||||
void
|
void
|
||||||
Init_utf_16_32(void)
|
Init_utf_16_32(void)
|
||||||
{
|
{
|
||||||
@ -441,4 +466,5 @@ Init_utf_16_32(void)
|
|||||||
rb_register_transcoder(&rb_from_UTF_32LE);
|
rb_register_transcoder(&rb_from_UTF_32LE);
|
||||||
rb_register_transcoder(&rb_to_UTF_32LE);
|
rb_register_transcoder(&rb_to_UTF_32LE);
|
||||||
rb_register_transcoder(&rb_from_UTF_16);
|
rb_register_transcoder(&rb_from_UTF_16);
|
||||||
|
rb_register_transcoder(&rb_to_UTF_16);
|
||||||
}
|
}
|
||||||
|
@ -1022,9 +1022,9 @@ class TestTranscode < Test::Unit::TestCase
|
|||||||
def test_utf_16_bom
|
def test_utf_16_bom
|
||||||
expected = "\u{3042}\u{3044}\u{20bb7}"
|
expected = "\u{3042}\u{3044}\u{20bb7}"
|
||||||
assert_equal(expected, %w/fffe4230443042d8b7df/.pack("H*").encode("UTF-8","UTF-16"))
|
assert_equal(expected, %w/fffe4230443042d8b7df/.pack("H*").encode("UTF-8","UTF-16"))
|
||||||
assert_equal(expected, %w/feff30423044d842dfb7/.pack("H*").encode("UTF-8","UTF-16"))
|
check_both_ways(expected, %w/feff30423044d842dfb7/.pack("H*"), "UTF-16")
|
||||||
assert_raise(Encoding::InvalidByteSequenceError){%w/feffdfb7/.pack("H*").encode("UTF-8","UTF-16"))
|
assert_raise(Encoding::InvalidByteSequenceError){%w/feffdfb7/.pack("H*").encode("UTF-8","UTF-16")}
|
||||||
assert_raise(Encoding::InvalidByteSequenceError){%w/fffeb7df/.pack("H*").encode("UTF-8","UTF-16"))
|
assert_raise(Encoding::InvalidByteSequenceError){%w/fffeb7df/.pack("H*").encode("UTF-8","UTF-16")}
|
||||||
end
|
end
|
||||||
|
|
||||||
def check_utf_32_both_ways(utf8, raw)
|
def check_utf_32_both_ways(utf8, raw)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user