* enc/utf_16_32.h: add UTF-16 and UTF-32 as a dummy encoding.
* enc/trans/utf_16_32.trans: add a converter from UTF-16 to UTF-8. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@29889 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
92b190fa24
commit
78bee9c26a
@ -1,3 +1,9 @@
|
|||||||
|
Wed Nov 24 01:40:23 2010 NARUSE, Yui <naruse@ruby-lang.org>
|
||||||
|
|
||||||
|
* enc/utf_16_32.h: add UTF-16 and UTF-32 as a dummy encoding.
|
||||||
|
|
||||||
|
* enc/trans/utf_16_32.trans: add a converter from UTF-16 to UTF-8.
|
||||||
|
|
||||||
Tue Nov 23 21:59:47 2010 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
Tue Nov 23 21:59:47 2010 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||||
|
|
||||||
* win32/win32.c (wlink, rb_w32_getppid): use typedef instead of
|
* win32/win32.c (wlink, rb_w32_getppid): use typedef instead of
|
||||||
|
@ -21,6 +21,10 @@
|
|||||||
map["00{01-10}{00-ff}{00-ff}"] = :func_so
|
map["00{01-10}{00-ff}{00-ff}"] = :func_so
|
||||||
transcode_generate_node(ActionMap.parse(map), "from_UTF_32BE")
|
transcode_generate_node(ActionMap.parse(map), "from_UTF_32BE")
|
||||||
|
|
||||||
|
map = {}
|
||||||
|
map["{00-ff}{00-ff}"] = :func_si
|
||||||
|
transcode_generate_node(ActionMap.parse(map), "from_UTF_16")
|
||||||
|
|
||||||
map = {}
|
map = {}
|
||||||
map["{00-7f}"] = :func_so
|
map["{00-7f}"] = :func_so
|
||||||
map["{c2-df}{80-bf}"] = :func_so
|
map["{c2-df}{80-bf}"] = :func_so
|
||||||
@ -259,6 +263,64 @@ fun_so_to_utf_32le(void *statep, const unsigned char *s, size_t l, unsigned char
|
|||||||
return 4;
|
return 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
state_init(void *statep)
|
||||||
|
{
|
||||||
|
unsigned char *sp = statep;
|
||||||
|
*sp = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static VALUE
|
||||||
|
fun_si_from_utf_16(void *statep, const unsigned char *s, size_t l)
|
||||||
|
{
|
||||||
|
#define BE 1
|
||||||
|
#define LE 2
|
||||||
|
unsigned char *sp = statep;
|
||||||
|
switch (*sp) {
|
||||||
|
case 0:
|
||||||
|
if (s[0] == 0xFE && s[1] == 0xFF) {
|
||||||
|
*sp = BE;
|
||||||
|
return ZERObt;
|
||||||
|
}
|
||||||
|
else if (s[0] == 0xFF && s[1] == 0xFE) {
|
||||||
|
*sp = LE;
|
||||||
|
return ZERObt;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case BE:
|
||||||
|
if (0xD8 <= s[0] && s[0] <= 0xDB) {
|
||||||
|
return (VALUE)from_UTF_16BE_D8toDB_00toFF;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return (VALUE)FUNso;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case LE:
|
||||||
|
if (0xD8 <= s[1] && s[1] <= 0xDB) {
|
||||||
|
return (VALUE)from_UTF_16LE_00toFF_D8toDB;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return (VALUE)FUNso;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return (VALUE)INVALID;
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t
|
||||||
|
fun_so_from_utf_16(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
|
||||||
|
{
|
||||||
|
unsigned char *sp = statep;
|
||||||
|
switch (*sp) {
|
||||||
|
case BE:
|
||||||
|
return fun_so_from_utf_16be(statep, s, l, o, osize);
|
||||||
|
case LE:
|
||||||
|
return fun_so_from_utf_16le(statep, s, l, o, osize);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static const rb_transcoder
|
static const rb_transcoder
|
||||||
rb_from_UTF_16BE = {
|
rb_from_UTF_16BE = {
|
||||||
"UTF-16BE", "UTF-8", from_UTF_16BE,
|
"UTF-16BE", "UTF-8", from_UTF_16BE,
|
||||||
@ -355,6 +417,18 @@ rb_to_UTF_32LE = {
|
|||||||
NULL, NULL, NULL, fun_so_to_utf_32le
|
NULL, NULL, NULL, fun_so_to_utf_32le
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const rb_transcoder
|
||||||
|
rb_from_UTF_16 = {
|
||||||
|
"UTF-16", "UTF-8", from_UTF_16,
|
||||||
|
TRANSCODE_TABLE_INFO,
|
||||||
|
2, /* input_unit_length */
|
||||||
|
4, /* max_input */
|
||||||
|
4, /* max_output */
|
||||||
|
asciicompat_decoder, /* asciicompat_type */
|
||||||
|
1, state_init, NULL, /* state_size, state_init, state_fini */
|
||||||
|
NULL, fun_si_from_utf_16, NULL, fun_so_from_utf_16
|
||||||
|
};
|
||||||
|
|
||||||
void
|
void
|
||||||
Init_utf_16_32(void)
|
Init_utf_16_32(void)
|
||||||
{
|
{
|
||||||
@ -366,4 +440,5 @@ Init_utf_16_32(void)
|
|||||||
rb_register_transcoder(&rb_to_UTF_32BE);
|
rb_register_transcoder(&rb_to_UTF_32BE);
|
||||||
rb_register_transcoder(&rb_from_UTF_32LE);
|
rb_register_transcoder(&rb_from_UTF_32LE);
|
||||||
rb_register_transcoder(&rb_to_UTF_32LE);
|
rb_register_transcoder(&rb_to_UTF_32LE);
|
||||||
|
rb_register_transcoder(&rb_from_UTF_16);
|
||||||
}
|
}
|
||||||
|
4
enc/utf_16_32.h
Normal file
4
enc/utf_16_32.h
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
#include "regenc.h"
|
||||||
|
/* dummy for unsupported, statefull encoding */
|
||||||
|
ENC_DUMMY("UTF-16");
|
||||||
|
ENC_DUMMY("UTF-32");
|
@ -1019,6 +1019,12 @@ class TestTranscode < Test::Unit::TestCase
|
|||||||
check_utf_16_both_ways("\u{F00FF}", "\xDB\x80\xDC\xFF")
|
check_utf_16_both_ways("\u{F00FF}", "\xDB\x80\xDC\xFF")
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_utf_16_bom
|
||||||
|
expected = "\u{3042}\u{3044}\u{20bb7}"
|
||||||
|
assert_equal(expected, %w/fffe4230443042d8b7df/.pack("H*").encode("UTF-8","UTF-16"))
|
||||||
|
assert_equal(expected, %w/feff30423044d842dfb7/.pack("H*").encode("UTF-8","UTF-16"))
|
||||||
|
end
|
||||||
|
|
||||||
def check_utf_32_both_ways(utf8, raw)
|
def check_utf_32_both_ways(utf8, raw)
|
||||||
copy = raw.dup
|
copy = raw.dup
|
||||||
0.step(copy.length-1, 4) do |i|
|
0.step(copy.length-1, 4) do |i|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user