* transcode_data.h (rb_transcoding): new field "stateful".
(rb_transcoder): preprocessor and postprocessor field removed. change arguments of func_ii, func_si, func_io and func_so. new field "finish_func". * tool/transcode-tblgen.rb: make FUNii, FUNsi and FUNio generatable. * transcode.c (transcoder_lib_table): removed. (transcoder_table): change structure. (transcoder_key): removed because the above structure change. (make_transcoder_entry): new function. (get_transcoder_entry): ditto. (rb_register_transcoder): follow the structure change. (declare_transcoder): ditto. (transcode_search_path): new function for breadth first search to find a list of converters. (transcode_search_path_i): new function. (transcode_dispatch_cb): ditto. (transcode_dispatch): use transcode_search_path. (transcode_loop): follow the argument change. (str_transcode): preprocessor and postprocessor stuff removed. * enc/trans/iso2022.erb.c: new file. ISO-2022-JP conversion re-implemented. * enc/trans/japanese.erb.c: ISO-2022-JP stuff removed. nute(23:52:53)% head -40 ChangeLog Thu Aug 7 23:43:11 2008 Tanaka Akira <akr@fsij.org> * transcode_data.h (rb_transcoding): new field "stateful". (rb_transcoder): preprocessor and postprocessor field removed. change arguments of func_ii, func_si, func_io and func_so. new field "finish_func". * tool/transcode-tblgen.rb: make FUNii, FUNsi and FUNio generatable. * transcode.c (transcoder_lib_table): removed. (transcoder_table): change structure. (transcoder_key): removed because the above structure change. (make_transcoder_entry): new function. (get_transcoder_entry): ditto. (rb_register_transcoder): follow the structure change. (declare_transcoder): ditto. (transcode_search_path): new function for breadth first search to find a list of converters. (transcode_search_path_i): new function. (transcode_dispatch_cb): ditto. (transcode_dispatch): use transcode_search_path. (transcode_loop): follow the argument change. (str_transcode): preprocessor and postprocessor stuff removed. * enc/trans/iso2022.erb.c: new file. ISO-2022-JP conversion re-implemented. * enc/trans/japanese.erb.c: ISO-2022-JP stuff removed. * enc/trans/utf_16_32.erb.c: follow argument change of FUNso. [ruby-dev:35798] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18419 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
05373c4469
commit
1504652373
34
ChangeLog
34
ChangeLog
@ -1,3 +1,37 @@
|
|||||||
|
Thu Aug 7 23:43:11 2008 Tanaka Akira <akr@fsij.org>
|
||||||
|
|
||||||
|
* transcode_data.h (rb_transcoding): new field "stateful".
|
||||||
|
(rb_transcoder): preprocessor and postprocessor field removed.
|
||||||
|
change arguments of func_ii, func_si, func_io and func_so.
|
||||||
|
new field "finish_func".
|
||||||
|
|
||||||
|
* tool/transcode-tblgen.rb: make FUNii, FUNsi and FUNio
|
||||||
|
generatable.
|
||||||
|
|
||||||
|
* transcode.c (transcoder_lib_table): removed.
|
||||||
|
(transcoder_table): change structure.
|
||||||
|
(transcoder_key): removed because the above structure change.
|
||||||
|
(make_transcoder_entry): new function.
|
||||||
|
(get_transcoder_entry): ditto.
|
||||||
|
(rb_register_transcoder): follow the structure change.
|
||||||
|
(declare_transcoder): ditto.
|
||||||
|
(transcode_search_path): new function for breadth first search to
|
||||||
|
find a list of converters.
|
||||||
|
(transcode_search_path_i): new function.
|
||||||
|
(transcode_dispatch_cb): ditto.
|
||||||
|
(transcode_dispatch): use transcode_search_path.
|
||||||
|
(transcode_loop): follow the argument change.
|
||||||
|
(str_transcode): preprocessor and postprocessor stuff removed.
|
||||||
|
|
||||||
|
* enc/trans/iso2022.erb.c: new file. ISO-2022-JP conversion
|
||||||
|
re-implemented.
|
||||||
|
|
||||||
|
* enc/trans/japanese.erb.c: ISO-2022-JP stuff removed.
|
||||||
|
|
||||||
|
* enc/trans/utf_16_32.erb.c: follow argument change of FUNso.
|
||||||
|
|
||||||
|
[ruby-dev:35798]
|
||||||
|
|
||||||
Thu Aug 7 22:55:44 2008 TAKAO Kouji <kouji@takao7.net>
|
Thu Aug 7 22:55:44 2008 TAKAO Kouji <kouji@takao7.net>
|
||||||
|
|
||||||
* ext/readline/README.ja: updated API document for Readline module.
|
* ext/readline/README.ja: updated API document for Readline module.
|
||||||
|
142
enc/trans/iso2022.erb.c
Normal file
142
enc/trans/iso2022.erb.c
Normal file
@ -0,0 +1,142 @@
|
|||||||
|
#include "transcode_data.h"
|
||||||
|
|
||||||
|
<%
|
||||||
|
map = {}
|
||||||
|
map["1b2842"] = :func_so # designate US-ASCII to G0. "ESC ( B"
|
||||||
|
map["1b284a"] = :func_so # designate JIS X 0201 latin to G0. "ESC ( J"
|
||||||
|
map["1b2440"] = :func_so # designate JIS X 0208 1978 to G0. "ESC $ @"
|
||||||
|
map["1b2442"] = :func_so # designate JIS X 0208 1983 to G0. "ESC $ B"
|
||||||
|
map["{00-0d,10-1a,1c-7f}"] = :func_si
|
||||||
|
|
||||||
|
map_jisx0208_rest = {}
|
||||||
|
map_jisx0208_rest["{21-7e}"] = :func_so
|
||||||
|
%>
|
||||||
|
|
||||||
|
<%= transcode_generate_node(ActionMap.parse(map), "iso2022jp_to_eucjp", []) %>
|
||||||
|
<%= transcode_generate_node(ActionMap.parse(map_jisx0208_rest), "iso2022jp_to_eucjp_jisx0208_rest", []) %>
|
||||||
|
|
||||||
|
static VALUE
|
||||||
|
fun_si_iso2022jp_to_eucjp(rb_transcoding* t, const unsigned char* s, size_t l)
|
||||||
|
{
|
||||||
|
if (t->stateful[0] == 0)
|
||||||
|
return (VALUE)NOMAP;
|
||||||
|
else if (0x21 <= s[0] && s[0] <= 0x7e)
|
||||||
|
return (VALUE)&iso2022jp_to_eucjp_jisx0208_rest;
|
||||||
|
else
|
||||||
|
return (VALUE)INVALID;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
fun_so_iso2022jp_to_eucjp(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
|
||||||
|
{
|
||||||
|
if (s[0] == 0x1b) {
|
||||||
|
if (s[1] == '(') {
|
||||||
|
switch (s[l-1]) {
|
||||||
|
case 'B':
|
||||||
|
case 'J':
|
||||||
|
t->stateful[0] = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
switch (s[l-1]) {
|
||||||
|
case '@':
|
||||||
|
case 'B':
|
||||||
|
t->stateful[0] = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
o[0] = s[0] | 0x80;
|
||||||
|
o[1] = s[1] | 0x80;
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static const rb_transcoder
|
||||||
|
rb_ISO_2022_JP_to_EUC_JP = {
|
||||||
|
"ISO-2022-JP", "EUC-JP", &iso2022jp_to_eucjp, 3, 0,
|
||||||
|
NULL, fun_si_iso2022jp_to_eucjp, NULL, fun_so_iso2022jp_to_eucjp
|
||||||
|
};
|
||||||
|
|
||||||
|
<%
|
||||||
|
map_eucjp = {
|
||||||
|
"{0e,0f,1b}" => :undef,
|
||||||
|
"{00-0d,10-1a,1c-7f}" => :func_so,
|
||||||
|
"{a1-fe}{a1-fe}" => :func_so,
|
||||||
|
"8e{a1-fe}" => :undef,
|
||||||
|
"8f{a1-fe}{a1-fe}" => :undef,
|
||||||
|
}
|
||||||
|
%>
|
||||||
|
|
||||||
|
<%= transcode_generate_node(ActionMap.parse(map_eucjp), "eucjp_to_iso2022jp", []) %>
|
||||||
|
|
||||||
|
static int
|
||||||
|
fun_so_eucjp_to_iso2022jp(rb_transcoding *t, const unsigned char *s, size_t l, unsigned char *o)
|
||||||
|
{
|
||||||
|
unsigned char *output0 = o;
|
||||||
|
|
||||||
|
if (t->stateful[0] == 0) {
|
||||||
|
t->stateful[0] = 1; /* initialized flag */
|
||||||
|
t->stateful[1] = 1; /* ASCII mode */
|
||||||
|
}
|
||||||
|
|
||||||
|
if (l != t->stateful[1]) {
|
||||||
|
if (l == 1) {
|
||||||
|
*o++ = 0x1b;
|
||||||
|
*o++ = '(';
|
||||||
|
*o++ = 'B';
|
||||||
|
t->stateful[1] = 1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
*o++ = 0x1b;
|
||||||
|
*o++ = '$';
|
||||||
|
*o++ = 'B';
|
||||||
|
t->stateful[1] = 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (l == 1) {
|
||||||
|
*o++ = s[0] & 0x7f;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
*o++ = s[0] & 0x7f;
|
||||||
|
*o++ = s[1] & 0x7f;
|
||||||
|
}
|
||||||
|
|
||||||
|
return o - output0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
finish_eucjp_to_iso2022jp(rb_transcoding *t, unsigned char *o)
|
||||||
|
{
|
||||||
|
unsigned char *output0 = o;
|
||||||
|
|
||||||
|
if (t->stateful[0] == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (t->stateful[1] != 1) {
|
||||||
|
*o++ = 0x1b;
|
||||||
|
*o++ = '(';
|
||||||
|
*o++ = 'B';
|
||||||
|
t->stateful[1] = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return o - output0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const rb_transcoder
|
||||||
|
rb_EUC_JP_to_ISO_2022_JP = {
|
||||||
|
"EUC-JP", "ISO-2022-JP", &eucjp_to_iso2022jp, 5, 0,
|
||||||
|
NULL, NULL, NULL, fun_so_eucjp_to_iso2022jp, finish_eucjp_to_iso2022jp
|
||||||
|
};
|
||||||
|
|
||||||
|
void
|
||||||
|
Init_iso2022(void)
|
||||||
|
{
|
||||||
|
rb_register_transcoder(&rb_ISO_2022_JP_to_EUC_JP);
|
||||||
|
rb_register_transcoder(&rb_EUC_JP_to_ISO_2022_JP);
|
||||||
|
}
|
||||||
|
|
@ -17,235 +17,8 @@
|
|||||||
<%= transcode_tblgen "UTF-8", "EUC-JP", [["{00-7f}", :nomap], *UCS_TO_EUCJP_TBL] %>
|
<%= transcode_tblgen "UTF-8", "EUC-JP", [["{00-7f}", :nomap], *UCS_TO_EUCJP_TBL] %>
|
||||||
<%= transcode_tblgen "UTF-8", "CP51932", [["{00-7f}", :nomap], *UCS_TO_EUCJP_TBL] %>
|
<%= transcode_tblgen "UTF-8", "CP51932", [["{00-7f}", :nomap], *UCS_TO_EUCJP_TBL] %>
|
||||||
|
|
||||||
#define ISO_2022_ENCODING(escseq, byte) ((escseq<<8)|byte)
|
|
||||||
enum ISO_2022_ESCSEQ {
|
|
||||||
ISO_2022_CZD = '!',
|
|
||||||
ISO_2022_C1D = '"',
|
|
||||||
ISO_2022_GZD4 = '(',
|
|
||||||
ISO_2022_G1D4 = ')',
|
|
||||||
ISO_2022_G2D4 = '*',
|
|
||||||
ISO_2022_G3D4 = '+',
|
|
||||||
ISO_2022_G1D6 = '-',
|
|
||||||
ISO_2022_G2D6 = '.',
|
|
||||||
ISO_2022_G3D6 = '/',
|
|
||||||
ISO_2022_GZDM4 = ISO_2022_ENCODING('$','('),
|
|
||||||
ISO_2022_G1DM4 = ISO_2022_ENCODING('$',')'),
|
|
||||||
ISO_2022_G2DM4 = ISO_2022_ENCODING('$','*'),
|
|
||||||
ISO_2022_G3DM4 = ISO_2022_ENCODING('$','+'),
|
|
||||||
ISO_2022_G1DM6 = ISO_2022_ENCODING('$','-'),
|
|
||||||
ISO_2022_G2DM6 = ISO_2022_ENCODING('$','.'),
|
|
||||||
ISO_2022_G3DM6 = ISO_2022_ENCODING('$','/'),
|
|
||||||
ISO_2022_DOCS = ISO_2022_ENCODING('%','I'),
|
|
||||||
ISO_2022_IRR = '&'
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
#define ISO_2022_GZ_ASCII ISO_2022_ENCODING(ISO_2022_GZD4, 'B')
|
|
||||||
#define ISO_2022_GZ_JIS_X_0201_Katakana ISO_2022_ENCODING(ISO_2022_GZD4, 'I')
|
|
||||||
#define ISO_2022_GZ_JIS_X_0201_Roman ISO_2022_ENCODING(ISO_2022_GZD4, 'J')
|
|
||||||
#define ISO_2022_GZ_JIS_C_6226_1978 ISO_2022_ENCODING(ISO_2022_GZDM4,'@')
|
|
||||||
#define ISO_2022_GZ_JIS_X_0208_1983 ISO_2022_ENCODING(ISO_2022_GZDM4,'B')
|
|
||||||
#define ISO_2022_GZ_JIS_X_0212_1990 ISO_2022_ENCODING(ISO_2022_GZDM4,'D')
|
|
||||||
#define ISO_2022_GZ_JIS_X_0213_2000_1 ISO_2022_ENCODING(ISO_2022_GZDM4,'O')
|
|
||||||
#define ISO_2022_GZ_JIS_X_0213_2000_2 ISO_2022_ENCODING(ISO_2022_GZDM4,'P')
|
|
||||||
#define ISO_2022_GZ_JIS_X_0213_2004_1 ISO_2022_ENCODING(ISO_2022_GZDM4,'Q')
|
|
||||||
|
|
||||||
#define UNSUPPORTED_MODE TRANSCODE_ERROR
|
|
||||||
|
|
||||||
static int
|
|
||||||
get_iso_2022_mode(const unsigned char **in_pos)
|
|
||||||
{
|
|
||||||
int new_mode;
|
|
||||||
const unsigned char *in_p = *in_pos;
|
|
||||||
switch (*in_p++) {
|
|
||||||
case '(':
|
|
||||||
switch (*in_p++) {
|
|
||||||
case 'B': case 'I': case 'J':
|
|
||||||
new_mode = ISO_2022_ENCODING(ISO_2022_GZD4, *(in_p-1));
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
rb_raise(UNSUPPORTED_MODE, "this mode is not supported (ESC ( %c)", *(in_p-1));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case '$':
|
|
||||||
switch (*in_p++) {
|
|
||||||
case '@': case 'A': case 'B':
|
|
||||||
new_mode = ISO_2022_ENCODING(ISO_2022_GZDM4, *(in_p-1));
|
|
||||||
break;
|
|
||||||
case '(':
|
|
||||||
switch (*in_p++) {
|
|
||||||
case 'D': case 'O': case 'P': case 'Q':
|
|
||||||
new_mode = ISO_2022_ENCODING(ISO_2022_GZDM4, *(in_p-1));
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
rb_raise(UNSUPPORTED_MODE, "this mode is not supported (ESC $ ( %c)", *(in_p-1));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
rb_raise(UNSUPPORTED_MODE, "this mode is not supported (ESC $ %c)", *(in_p-1));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
rb_raise(UNSUPPORTED_MODE, "this mode is not supported (ESC %c)", *(in_p-1));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
*in_pos = in_p;
|
|
||||||
return new_mode;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
from_iso_2022_jp_transcoder_preprocessor(const unsigned char **in_pos, unsigned char **out_pos,
|
|
||||||
const unsigned char *in_stop, unsigned char *out_stop,
|
|
||||||
rb_transcoding *my_transcoding)
|
|
||||||
{
|
|
||||||
const rb_transcoder *my_transcoder = my_transcoding->transcoder;
|
|
||||||
const unsigned char *in_p = *in_pos;
|
|
||||||
unsigned char *out_p = *out_pos;
|
|
||||||
int cur_mode = ISO_2022_GZ_ASCII;
|
|
||||||
unsigned char c1;
|
|
||||||
unsigned char *out_s = out_stop - my_transcoder->max_output + 1;
|
|
||||||
while (in_p < in_stop) {
|
|
||||||
if (out_p >= out_s) {
|
|
||||||
int len = (out_p - *out_pos);
|
|
||||||
int new_len = (len + my_transcoder->max_output) * 2;
|
|
||||||
*out_pos = (*my_transcoding->flush_func)(my_transcoding, len, new_len);
|
|
||||||
out_p = *out_pos + len;
|
|
||||||
out_s = *out_pos + new_len - my_transcoder->max_output;
|
|
||||||
}
|
|
||||||
c1 = *in_p++;
|
|
||||||
if (c1 == 0x1B) {
|
|
||||||
cur_mode = get_iso_2022_mode(&in_p);
|
|
||||||
}
|
|
||||||
else if (c1 == 0x1E || c1 == 0x1F) {
|
|
||||||
/* SHIFT */
|
|
||||||
rb_raise(UNSUPPORTED_MODE, "shift is not supported");
|
|
||||||
}
|
|
||||||
else if (c1 >= 0x80) {
|
|
||||||
rb_raise(TRANSCODE_ERROR, "invalid byte sequence");
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
switch (cur_mode) {
|
|
||||||
case ISO_2022_GZ_ASCII:
|
|
||||||
case ISO_2022_GZ_JIS_X_0201_Roman:
|
|
||||||
*out_p++ = c1;
|
|
||||||
break;
|
|
||||||
case ISO_2022_GZ_JIS_X_0201_Katakana:
|
|
||||||
*out_p++ = 0x8E;
|
|
||||||
*out_p++ = c1 | 0x80;
|
|
||||||
break;
|
|
||||||
case ISO_2022_GZ_JIS_X_0212_1990:
|
|
||||||
*out_p++ = 0x8F;
|
|
||||||
case ISO_2022_GZ_JIS_C_6226_1978:
|
|
||||||
case ISO_2022_GZ_JIS_X_0208_1983:
|
|
||||||
*out_p++ = c1 | 0x80;
|
|
||||||
*out_p++ = *in_p++ | 0x80;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* cleanup */
|
|
||||||
*in_pos = in_p;
|
|
||||||
*out_pos = out_p;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int
|
|
||||||
select_iso_2022_mode(unsigned char **out_pos, int new_mode)
|
|
||||||
{
|
|
||||||
unsigned char *out_p = *out_pos;
|
|
||||||
*out_p++ = '\x1b';
|
|
||||||
switch (new_mode>>8) {
|
|
||||||
case ISO_2022_GZD4:
|
|
||||||
*out_p++ = new_mode >> 8;
|
|
||||||
*out_p++ = new_mode & 0x7F;
|
|
||||||
break;
|
|
||||||
case ISO_2022_GZDM4:
|
|
||||||
*out_p++ = new_mode >> 16;
|
|
||||||
if ((new_mode & 0x7F) != '@' &&
|
|
||||||
(new_mode & 0x7F) != 'A' &&
|
|
||||||
(new_mode & 0x7F) != 'B')
|
|
||||||
{
|
|
||||||
*out_p++ = (new_mode>>8) & 0x7F;
|
|
||||||
}
|
|
||||||
*out_p++ = new_mode & 0x7F;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
rb_raise(UNSUPPORTED_MODE, "this mode is not supported.");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
*out_pos = out_p;
|
|
||||||
return new_mode;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
to_iso_2022_jp_transcoder_postprocessor(const unsigned char **in_pos, unsigned char **out_pos,
|
|
||||||
const unsigned char *in_stop, unsigned char *out_stop,
|
|
||||||
rb_transcoding *my_transcoding)
|
|
||||||
{
|
|
||||||
const rb_transcoder *my_transcoder = my_transcoding->transcoder;
|
|
||||||
const unsigned char *in_p = *in_pos;
|
|
||||||
unsigned char *out_p = *out_pos;
|
|
||||||
int cur_mode = ISO_2022_GZ_ASCII, new_mode = 0;
|
|
||||||
unsigned char next_byte;
|
|
||||||
unsigned char *out_s = out_stop - my_transcoder->max_output + 1;
|
|
||||||
while (in_p < in_stop) {
|
|
||||||
if (out_p >= out_s) {
|
|
||||||
int len = (out_p - *out_pos);
|
|
||||||
int new_len = (len + my_transcoder->max_output) * 2;
|
|
||||||
*out_pos = (*my_transcoding->flush_func)(my_transcoding, len, new_len);
|
|
||||||
out_p = *out_pos + len;
|
|
||||||
out_s = *out_pos + new_len - my_transcoder->max_output;
|
|
||||||
}
|
|
||||||
next_byte = *in_p++;
|
|
||||||
if (next_byte < 0x80) {
|
|
||||||
new_mode = ISO_2022_GZ_ASCII;
|
|
||||||
}
|
|
||||||
else if (next_byte == 0x8E) {
|
|
||||||
new_mode = ISO_2022_GZ_JIS_X_0201_Katakana;
|
|
||||||
next_byte = *in_p++;
|
|
||||||
}
|
|
||||||
else if (next_byte == 0x8F) {
|
|
||||||
new_mode = ISO_2022_GZ_JIS_X_0212_1990;
|
|
||||||
next_byte = *in_p++;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
new_mode = ISO_2022_GZ_JIS_X_0208_1983;
|
|
||||||
}
|
|
||||||
if (cur_mode != new_mode)
|
|
||||||
cur_mode = select_iso_2022_mode(&out_p, new_mode);
|
|
||||||
if (cur_mode < 0xFFFF) {
|
|
||||||
*out_p++ = next_byte & 0x7F;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
*out_p++ = next_byte & 0x7F;
|
|
||||||
*out_p++ = *in_p++ & 0x7F;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (cur_mode != ISO_2022_GZ_ASCII)
|
|
||||||
cur_mode = select_iso_2022_mode(&out_p, ISO_2022_GZ_ASCII);
|
|
||||||
/* cleanup */
|
|
||||||
*in_pos = in_p;
|
|
||||||
*out_pos = out_p;
|
|
||||||
}
|
|
||||||
|
|
||||||
static const rb_transcoder
|
|
||||||
rb_from_ISO_2022_JP = {
|
|
||||||
"ISO-2022-JP", "UTF-8", &from_EUC_JP, 8, 0,
|
|
||||||
&from_iso_2022_jp_transcoder_preprocessor, NULL,
|
|
||||||
};
|
|
||||||
|
|
||||||
static const rb_transcoder
|
|
||||||
rb_to_ISO_2022_JP = {
|
|
||||||
"UTF-8", "ISO-2022-JP", &to_EUC_JP, 8, 1,
|
|
||||||
NULL, &to_iso_2022_jp_transcoder_postprocessor,
|
|
||||||
};
|
|
||||||
|
|
||||||
void
|
void
|
||||||
Init_japanese(void)
|
Init_japanese(void)
|
||||||
{
|
{
|
||||||
<%= transcode_register_code %>
|
<%= transcode_register_code %>
|
||||||
rb_register_transcoder(&rb_from_ISO_2022_JP);
|
|
||||||
rb_register_transcoder(&rb_to_ISO_2022_JP);
|
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#include "transcode_data.h"
|
#include "transcode_data.h"
|
||||||
|
|
||||||
static int
|
static int
|
||||||
fun_so_from_utf_16be(const unsigned char* s, unsigned char* o)
|
fun_so_from_utf_16be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
|
||||||
{
|
{
|
||||||
if (!s[0] && s[1]<0x80) {
|
if (!s[0] && s[1]<0x80) {
|
||||||
o[0] = s[1];
|
o[0] = s[1];
|
||||||
@ -29,7 +29,7 @@ fun_so_from_utf_16be(const unsigned char* s, unsigned char* o)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
fun_so_to_utf_16be(const unsigned char* s, unsigned char* o)
|
fun_so_to_utf_16be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
|
||||||
{
|
{
|
||||||
if (!(s[0]&0x80)) {
|
if (!(s[0]&0x80)) {
|
||||||
o[0] = 0x00;
|
o[0] = 0x00;
|
||||||
@ -57,7 +57,7 @@ fun_so_to_utf_16be(const unsigned char* s, unsigned char* o)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
fun_so_from_utf_16le(const unsigned char* s, unsigned char* o)
|
fun_so_from_utf_16le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
|
||||||
{
|
{
|
||||||
if (!s[1] && s[0]<0x80) {
|
if (!s[1] && s[0]<0x80) {
|
||||||
o[0] = s[0];
|
o[0] = s[0];
|
||||||
@ -85,7 +85,7 @@ fun_so_from_utf_16le(const unsigned char* s, unsigned char* o)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
fun_so_to_utf_16le(const unsigned char* s, unsigned char* o)
|
fun_so_to_utf_16le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
|
||||||
{
|
{
|
||||||
if (!(s[0]&0x80)) {
|
if (!(s[0]&0x80)) {
|
||||||
o[1] = 0x00;
|
o[1] = 0x00;
|
||||||
@ -113,7 +113,7 @@ fun_so_to_utf_16le(const unsigned char* s, unsigned char* o)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
fun_so_from_utf_32be(const unsigned char* s, unsigned char* o)
|
fun_so_from_utf_32be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
|
||||||
{
|
{
|
||||||
if (!s[1]) {
|
if (!s[1]) {
|
||||||
if (s[2]==0 && s[3]<0x80) {
|
if (s[2]==0 && s[3]<0x80) {
|
||||||
@ -142,7 +142,7 @@ fun_so_from_utf_32be(const unsigned char* s, unsigned char* o)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
fun_so_to_utf_32be(const unsigned char* s, unsigned char* o)
|
fun_so_to_utf_32be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
|
||||||
{
|
{
|
||||||
o[0] = 0;
|
o[0] = 0;
|
||||||
if (!(s[0]&0x80)) {
|
if (!(s[0]&0x80)) {
|
||||||
@ -168,13 +168,13 @@ fun_so_to_utf_32be(const unsigned char* s, unsigned char* o)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
fun_so_from_utf_32le(const unsigned char* s, unsigned char* o)
|
fun_so_from_utf_32le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
|
||||||
{
|
{
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
fun_so_to_utf_32le(const unsigned char* s, unsigned char* o)
|
fun_so_to_utf_32le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
|
||||||
{
|
{
|
||||||
return 4;
|
return 4;
|
||||||
}
|
}
|
||||||
@ -191,7 +191,7 @@ fun_so_to_utf_32le(const unsigned char* s, unsigned char* o)
|
|||||||
static const rb_transcoder
|
static const rb_transcoder
|
||||||
rb_from_UTF_16BE = {
|
rb_from_UTF_16BE = {
|
||||||
"UTF-16BE", "UTF-8", &from_UTF_16BE, 4, 0,
|
"UTF-16BE", "UTF-8", &from_UTF_16BE, 4, 0,
|
||||||
NULL, NULL, NULL, NULL, NULL, &fun_so_from_utf_16be
|
NULL, NULL, NULL, &fun_so_from_utf_16be
|
||||||
};
|
};
|
||||||
|
|
||||||
<%=
|
<%=
|
||||||
@ -217,7 +217,7 @@ rb_from_UTF_16BE = {
|
|||||||
static const rb_transcoder
|
static const rb_transcoder
|
||||||
rb_to_UTF_16BE = {
|
rb_to_UTF_16BE = {
|
||||||
"UTF-8", "UTF-16BE", &to_UTF_16BE, 4, 1,
|
"UTF-8", "UTF-16BE", &to_UTF_16BE, 4, 1,
|
||||||
NULL, NULL, NULL, NULL, NULL, &fun_so_to_utf_16be
|
NULL, NULL, NULL, &fun_so_to_utf_16be
|
||||||
};
|
};
|
||||||
|
|
||||||
<%=
|
<%=
|
||||||
@ -232,13 +232,13 @@ rb_to_UTF_16BE = {
|
|||||||
static const rb_transcoder
|
static const rb_transcoder
|
||||||
rb_from_UTF_16LE = {
|
rb_from_UTF_16LE = {
|
||||||
"UTF-16LE", "UTF-8", &from_UTF_16LE, 4, 0,
|
"UTF-16LE", "UTF-8", &from_UTF_16LE, 4, 0,
|
||||||
NULL, NULL, NULL, NULL, NULL, &fun_so_from_utf_16le
|
NULL, NULL, NULL, &fun_so_from_utf_16le
|
||||||
};
|
};
|
||||||
|
|
||||||
static const rb_transcoder
|
static const rb_transcoder
|
||||||
rb_to_UTF_16LE = {
|
rb_to_UTF_16LE = {
|
||||||
"UTF-8", "UTF-16LE", &to_UTF_16BE, 4, 1,
|
"UTF-8", "UTF-16LE", &to_UTF_16BE, 4, 1,
|
||||||
NULL, NULL, NULL, NULL, NULL, &fun_so_to_utf_16le
|
NULL, NULL, NULL, &fun_so_to_utf_16le
|
||||||
};
|
};
|
||||||
|
|
||||||
<%=
|
<%=
|
||||||
@ -254,13 +254,13 @@ rb_to_UTF_16LE = {
|
|||||||
static const rb_transcoder
|
static const rb_transcoder
|
||||||
rb_from_UTF_32BE = {
|
rb_from_UTF_32BE = {
|
||||||
"UTF-32BE", "UTF-8", &from_UTF_32BE, 4, 0,
|
"UTF-32BE", "UTF-8", &from_UTF_32BE, 4, 0,
|
||||||
NULL, NULL, NULL, NULL, NULL, &fun_so_from_utf_32be
|
NULL, NULL, NULL, &fun_so_from_utf_32be
|
||||||
};
|
};
|
||||||
|
|
||||||
static const rb_transcoder
|
static const rb_transcoder
|
||||||
rb_to_UTF_32BE = {
|
rb_to_UTF_32BE = {
|
||||||
"UTF-8", "UTF-32BE", &to_UTF_16BE, 4, 1,
|
"UTF-8", "UTF-32BE", &to_UTF_16BE, 4, 1,
|
||||||
NULL, NULL, NULL, NULL, NULL, &fun_so_to_utf_32be
|
NULL, NULL, NULL, &fun_so_to_utf_32be
|
||||||
};
|
};
|
||||||
|
|
||||||
<%=
|
<%=
|
||||||
@ -276,13 +276,13 @@ rb_to_UTF_32BE = {
|
|||||||
static const rb_transcoder
|
static const rb_transcoder
|
||||||
rb_from_UTF_32LE = {
|
rb_from_UTF_32LE = {
|
||||||
"UTF-32LE", "UTF-8", &from_UTF_32LE, 4, 0,
|
"UTF-32LE", "UTF-8", &from_UTF_32LE, 4, 0,
|
||||||
NULL, NULL, NULL, NULL, NULL, &fun_so_from_utf_32le
|
NULL, NULL, NULL, &fun_so_from_utf_32le
|
||||||
};
|
};
|
||||||
|
|
||||||
static const rb_transcoder
|
static const rb_transcoder
|
||||||
rb_to_UTF_32LE = {
|
rb_to_UTF_32LE = {
|
||||||
"UTF-8", "UTF-32LE", &to_UTF_16BE, 4, 1,
|
"UTF-8", "UTF-32LE", &to_UTF_16BE, 4, 1,
|
||||||
NULL, NULL, NULL, NULL, NULL, &fun_so_to_utf_32le
|
NULL, NULL, NULL, &fun_so_to_utf_32le
|
||||||
};
|
};
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -321,12 +321,13 @@ class TestTranscode < Test::Unit::TestCase
|
|||||||
assert_raise(RuntimeError) { "\x1b(A".encode("utf-8", "iso-2022-jp") }
|
assert_raise(RuntimeError) { "\x1b(A".encode("utf-8", "iso-2022-jp") }
|
||||||
assert_raise(RuntimeError) { "\x1b$(A".encode("utf-8", "iso-2022-jp") }
|
assert_raise(RuntimeError) { "\x1b$(A".encode("utf-8", "iso-2022-jp") }
|
||||||
assert_raise(RuntimeError) { "\x1b$C".encode("utf-8", "iso-2022-jp") }
|
assert_raise(RuntimeError) { "\x1b$C".encode("utf-8", "iso-2022-jp") }
|
||||||
assert_raise(RuntimeError) { "\x1e".encode("utf-8", "iso-2022-jp") }
|
assert_raise(RuntimeError) { "\x0e".encode("utf-8", "iso-2022-jp") }
|
||||||
assert_raise(RuntimeError) { "\x80".encode("utf-8", "iso-2022-jp") }
|
assert_raise(RuntimeError) { "\x80".encode("utf-8", "iso-2022-jp") }
|
||||||
assert_raise(RuntimeError) { "\x1b$(Dd!\x1b(B".encode("utf-8", "iso-2022-jp") }
|
assert_raise(RuntimeError) { "\x1b$(Dd!\x1b(B".encode("utf-8", "iso-2022-jp") }
|
||||||
assert_raise(RuntimeError) { "\u9299".encode("iso-2022-jp") }
|
assert_raise(RuntimeError) { "\u9299".encode("iso-2022-jp") }
|
||||||
#@@@@ TODO: the next test should actually fail, because iso-2022-jp does not include half-width kana
|
assert_raise(RuntimeError) { "\u9299".encode("iso-2022-jp") }
|
||||||
check_both_ways("\uff71\uff72\uff73\uff74\uff75", "\x1b(I12345\x1b(B", "iso-2022-jp") # JIS X 0201 ァィゥェォ
|
assert_raise(RuntimeError) { "\uff71\uff72\uff73\uff74\uff75".encode("iso-2022-jp") }
|
||||||
|
assert_raise(RuntimeError) { "\x1b(I12345\x1b(B".encode("utf-8", "iso-2022-jp") }
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_iso_2022_jp_1
|
def test_iso_2022_jp_1
|
||||||
|
@ -234,6 +234,12 @@ class ActionMap
|
|||||||
"UNDEF"
|
"UNDEF"
|
||||||
when :invalid
|
when :invalid
|
||||||
"INVALID"
|
"INVALID"
|
||||||
|
when :func_ii
|
||||||
|
"FUNii"
|
||||||
|
when :func_si
|
||||||
|
"FUNsi"
|
||||||
|
when :func_io
|
||||||
|
"FUNio"
|
||||||
when :func_so
|
when :func_so
|
||||||
"FUNso"
|
"FUNso"
|
||||||
when /\A([0-9a-f][0-9a-f])\z/i
|
when /\A([0-9a-f][0-9a-f])\z/i
|
||||||
|
308
transcode.c
308
transcode.c
@ -25,53 +25,78 @@ static VALUE sym_invalid, sym_undef, sym_ignore, sym_replace;
|
|||||||
* Dispatch data and logic
|
* Dispatch data and logic
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static st_table *transcoder_table, *transcoder_lib_table;
|
typedef struct {
|
||||||
|
const char *from;
|
||||||
|
const char *to;
|
||||||
|
const char *lib; /* maybe null. it means that don't load the library. */
|
||||||
|
const rb_transcoder *transcoder;
|
||||||
|
} transcoder_entry_t;
|
||||||
|
|
||||||
#define TRANSCODER_INTERNAL_SEPARATOR '\t'
|
static st_table *transcoder_table;
|
||||||
|
|
||||||
static char *
|
static transcoder_entry_t *
|
||||||
transcoder_key(const char *from_e, const char *to_e)
|
make_transcoder_entry(const char *from, const char *to)
|
||||||
{
|
{
|
||||||
int to_len = strlen(to_e);
|
st_data_t val;
|
||||||
int from_len = strlen(from_e);
|
st_table *table2;
|
||||||
char *const key = xmalloc(to_len + from_len + 2);
|
|
||||||
|
|
||||||
memcpy(key, to_e, to_len);
|
if (!st_lookup(transcoder_table, (st_data_t)from, &val)) {
|
||||||
memcpy(key + to_len + 1, from_e, from_len + 1);
|
val = (st_data_t)st_init_strcasetable();
|
||||||
key[to_len] = TRANSCODER_INTERNAL_SEPARATOR;
|
st_add_direct(transcoder_table, (st_data_t)from, val);
|
||||||
return key;
|
}
|
||||||
|
table2 = (st_table *)val;
|
||||||
|
if (!st_lookup(table2, (st_data_t)to, &val)) {
|
||||||
|
transcoder_entry_t *entry = ALLOC(transcoder_entry_t);
|
||||||
|
entry->from = from;
|
||||||
|
entry->to = to;
|
||||||
|
entry->lib = NULL;
|
||||||
|
entry->transcoder = NULL;
|
||||||
|
val = (st_data_t)entry;
|
||||||
|
st_add_direct(table2, (st_data_t)to, val);
|
||||||
|
}
|
||||||
|
return (transcoder_entry_t *)val;
|
||||||
|
}
|
||||||
|
|
||||||
|
static transcoder_entry_t *
|
||||||
|
get_transcoder_entry(const char *from, const char *to)
|
||||||
|
{
|
||||||
|
st_data_t val;
|
||||||
|
st_table *table2;
|
||||||
|
|
||||||
|
if (!st_lookup(transcoder_table, (st_data_t)from, &val)) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
table2 = (st_table *)val;
|
||||||
|
if (!st_lookup(table2, (st_data_t)to, &val)) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return (transcoder_entry_t *)val;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
rb_register_transcoder(const rb_transcoder *tr)
|
rb_register_transcoder(const rb_transcoder *tr)
|
||||||
{
|
{
|
||||||
st_data_t k, val = 0;
|
|
||||||
const char *const from_e = tr->from_encoding;
|
const char *const from_e = tr->from_encoding;
|
||||||
const char *const to_e = tr->to_encoding;
|
const char *const to_e = tr->to_encoding;
|
||||||
char *const key = transcoder_key(from_e, to_e);
|
|
||||||
|
|
||||||
if (st_lookup(transcoder_table, (st_data_t)key, &val)) {
|
transcoder_entry_t *entry;
|
||||||
xfree(key);
|
|
||||||
|
entry = make_transcoder_entry(from_e, to_e);
|
||||||
|
if (entry->transcoder) {
|
||||||
rb_raise(rb_eArgError, "transcoder from %s to %s has been already registered",
|
rb_raise(rb_eArgError, "transcoder from %s to %s has been already registered",
|
||||||
from_e, to_e);
|
from_e, to_e);
|
||||||
}
|
}
|
||||||
k = (st_data_t)key;
|
|
||||||
if (st_delete(transcoder_lib_table, &k, &val)) {
|
entry->transcoder = tr;
|
||||||
xfree((char *)k);
|
|
||||||
}
|
|
||||||
st_insert(transcoder_table, (st_data_t)key, (st_data_t)tr);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
declare_transcoder(const char *to, const char *from, const char *lib)
|
declare_transcoder(const char *to, const char *from, const char *lib)
|
||||||
{
|
{
|
||||||
const char *const key = transcoder_key(to, from);
|
transcoder_entry_t *entry;
|
||||||
st_data_t k = (st_data_t)key, val;
|
|
||||||
|
|
||||||
if (st_delete(transcoder_lib_table, &k, &val)) {
|
entry = make_transcoder_entry(from, to);
|
||||||
xfree((char *)k);
|
entry->lib = lib;
|
||||||
}
|
|
||||||
st_insert(transcoder_lib_table, (st_data_t)key, (st_data_t)lib);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define MAX_TRANSCODER_LIBNAME_LEN 64
|
#define MAX_TRANSCODER_LIBNAME_LEN 64
|
||||||
@ -90,38 +115,166 @@ rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib)
|
|||||||
|
|
||||||
#define encoding_equal(enc1, enc2) (STRCASECMP(enc1, enc2) == 0)
|
#define encoding_equal(enc1, enc2) (STRCASECMP(enc1, enc2) == 0)
|
||||||
|
|
||||||
|
typedef struct search_path_queue_tag {
|
||||||
|
struct search_path_queue_tag *next;
|
||||||
|
const char *enc;
|
||||||
|
} search_path_queue_t;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
st_table *visited;
|
||||||
|
search_path_queue_t *queue;
|
||||||
|
search_path_queue_t **queue_last_ptr;
|
||||||
|
const char *base_enc;
|
||||||
|
} search_path_bfs_t;
|
||||||
|
|
||||||
|
static int
|
||||||
|
transcode_search_path_i(st_data_t key, st_data_t val, st_data_t arg)
|
||||||
|
{
|
||||||
|
const char *to = (const char *)key;
|
||||||
|
search_path_bfs_t *bfs = (search_path_bfs_t *)arg;
|
||||||
|
search_path_queue_t *q;
|
||||||
|
|
||||||
|
if (st_lookup(bfs->visited, (st_data_t)to, &val)) {
|
||||||
|
return ST_CONTINUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
q = ALLOC(search_path_queue_t);
|
||||||
|
q->enc = to;
|
||||||
|
q->next = NULL;
|
||||||
|
*bfs->queue_last_ptr = q;
|
||||||
|
bfs->queue_last_ptr = &q->next;
|
||||||
|
|
||||||
|
st_add_direct(bfs->visited, (st_data_t)to, (st_data_t)bfs->base_enc);
|
||||||
|
return ST_CONTINUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
transcode_search_path(const char *from, const char *to,
|
||||||
|
void (*callback)(const char *from, const char *to, int depth, void *arg),
|
||||||
|
void *arg)
|
||||||
|
{
|
||||||
|
search_path_bfs_t bfs;
|
||||||
|
search_path_queue_t *q;
|
||||||
|
st_data_t val;
|
||||||
|
st_table *table2;
|
||||||
|
int found;
|
||||||
|
|
||||||
|
q = ALLOC(search_path_queue_t);
|
||||||
|
q->enc = from;
|
||||||
|
q->next = NULL;
|
||||||
|
bfs.queue_last_ptr = &q->next;
|
||||||
|
bfs.queue = q;
|
||||||
|
|
||||||
|
bfs.visited = st_init_strcasetable();
|
||||||
|
st_add_direct(bfs.visited, (st_data_t)from, (st_data_t)NULL);
|
||||||
|
|
||||||
|
while (bfs.queue) {
|
||||||
|
q = bfs.queue;
|
||||||
|
bfs.queue = q->next;
|
||||||
|
if (!bfs.queue)
|
||||||
|
bfs.queue_last_ptr = &bfs.queue;
|
||||||
|
|
||||||
|
if (!st_lookup(transcoder_table, (st_data_t)q->enc, &val)) {
|
||||||
|
xfree(q);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
table2 = (st_table *)val;
|
||||||
|
|
||||||
|
if (st_lookup(table2, (st_data_t)to, &val)) {
|
||||||
|
st_add_direct(bfs.visited, (st_data_t)to, (st_data_t)q->enc);
|
||||||
|
xfree(q);
|
||||||
|
found = 1;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
bfs.base_enc = q->enc;
|
||||||
|
st_foreach(table2, transcode_search_path_i, (st_data_t)&bfs);
|
||||||
|
bfs.base_enc = NULL;
|
||||||
|
|
||||||
|
xfree(q);
|
||||||
|
}
|
||||||
|
found = 0;
|
||||||
|
|
||||||
|
cleanup:
|
||||||
|
while (bfs.queue) {
|
||||||
|
q = bfs.queue;
|
||||||
|
bfs.queue = q->next;
|
||||||
|
xfree(q);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (found) {
|
||||||
|
const char *enc = to;
|
||||||
|
int depth = 0;
|
||||||
|
while (1) {
|
||||||
|
st_lookup(bfs.visited, (st_data_t)enc, &val);
|
||||||
|
if (!val)
|
||||||
|
break;
|
||||||
|
depth++;
|
||||||
|
enc = (const char *)val;
|
||||||
|
}
|
||||||
|
enc = to;
|
||||||
|
while (1) {
|
||||||
|
st_lookup(bfs.visited, (st_data_t)enc, &val);
|
||||||
|
if (!val)
|
||||||
|
break;
|
||||||
|
callback((const char *)val, enc, --depth, arg);
|
||||||
|
enc = (const char *)val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
st_free_table(bfs.visited);
|
||||||
|
|
||||||
|
return found;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
transcode_dispatch_cb(const char *from, const char *to, int depth, void *arg)
|
||||||
|
{
|
||||||
|
const rb_transcoder **first_transcoder_ptr = (const rb_transcoder **)arg;
|
||||||
|
|
||||||
|
transcoder_entry_t *entry;
|
||||||
|
|
||||||
|
if (!*first_transcoder_ptr)
|
||||||
|
return;
|
||||||
|
|
||||||
|
entry = get_transcoder_entry(from, to);
|
||||||
|
if (!entry)
|
||||||
|
goto failed;
|
||||||
|
|
||||||
|
if (!entry->transcoder && entry->lib) {
|
||||||
|
const char *lib = entry->lib;
|
||||||
|
int len = strlen(lib);
|
||||||
|
char path[sizeof(transcoder_lib_prefix) + MAX_TRANSCODER_LIBNAME_LEN];
|
||||||
|
|
||||||
|
entry->lib = NULL;
|
||||||
|
|
||||||
|
if (len > MAX_TRANSCODER_LIBNAME_LEN) goto failed;
|
||||||
|
memcpy(path, transcoder_lib_prefix, sizeof(transcoder_lib_prefix) - 1);
|
||||||
|
memcpy(path + sizeof(transcoder_lib_prefix) - 1, lib, len + 1);
|
||||||
|
if (!rb_require(path)) goto failed;
|
||||||
|
}
|
||||||
|
if (!entry->transcoder)
|
||||||
|
goto failed;
|
||||||
|
|
||||||
|
if (depth == 0)
|
||||||
|
*first_transcoder_ptr = entry->transcoder;
|
||||||
|
|
||||||
|
return;
|
||||||
|
|
||||||
|
failed:
|
||||||
|
*first_transcoder_ptr = NULL;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
static const rb_transcoder *
|
static const rb_transcoder *
|
||||||
transcode_dispatch(const char *from_encoding, const char *to_encoding)
|
transcode_dispatch(const char *from_encoding, const char *to_encoding)
|
||||||
{
|
{
|
||||||
char *const key = transcoder_key(from_encoding, to_encoding);
|
const rb_transcoder *first_transcoder = (rb_transcoder *)1;
|
||||||
st_data_t k, val = 0;
|
|
||||||
|
|
||||||
while (!st_lookup(transcoder_table, (k = (st_data_t)key), &val) &&
|
if (transcode_search_path(from_encoding, to_encoding, transcode_dispatch_cb, (void *)&first_transcoder)) {
|
||||||
st_delete(transcoder_lib_table, &k, &val)) {
|
return first_transcoder;
|
||||||
const char *const lib = (const char *)val;
|
|
||||||
int len = strlen(lib);
|
|
||||||
char path[sizeof(transcoder_lib_prefix) + MAX_TRANSCODER_LIBNAME_LEN];
|
|
||||||
|
|
||||||
xfree((char *)k);
|
|
||||||
if (len > MAX_TRANSCODER_LIBNAME_LEN) return NULL;
|
|
||||||
memcpy(path, transcoder_lib_prefix, sizeof(transcoder_lib_prefix) - 1);
|
|
||||||
memcpy(path + sizeof(transcoder_lib_prefix) - 1, lib, len + 1);
|
|
||||||
if (!rb_require(path)) return NULL;
|
|
||||||
}
|
}
|
||||||
if (!val) {
|
return NULL;
|
||||||
if (!st_lookup(transcoder_table, (st_data_t)key, &val)) {
|
|
||||||
xfree(key);
|
|
||||||
/* multistep logic, via UTF-8 */
|
|
||||||
if (!encoding_equal(from_encoding, "UTF-8") &&
|
|
||||||
!encoding_equal(to_encoding, "UTF-8") &&
|
|
||||||
transcode_dispatch("UTF-8", to_encoding)) { /* check that we have a second step */
|
|
||||||
return transcode_dispatch(from_encoding, "UTF-8"); /* return first step */
|
|
||||||
}
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
xfree(key);
|
|
||||||
return (rb_transcoder *)val;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -245,17 +398,17 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
|
|||||||
*out_p++ = getBT3(next_info);
|
*out_p++ = getBT3(next_info);
|
||||||
continue;
|
continue;
|
||||||
case FUNii:
|
case FUNii:
|
||||||
next_info = (VALUE)(*my_transcoder->func_ii)(next_info);
|
next_info = (VALUE)(*my_transcoder->func_ii)(my_transcoding, next_info);
|
||||||
goto follow_info;
|
goto follow_info;
|
||||||
case FUNsi:
|
case FUNsi:
|
||||||
next_info = (VALUE)(*my_transcoder->func_si)(char_start);
|
next_info = (VALUE)(*my_transcoder->func_si)(my_transcoding, char_start, (size_t)(in_p-char_start));
|
||||||
goto follow_info;
|
goto follow_info;
|
||||||
break;
|
break;
|
||||||
case FUNio:
|
case FUNio:
|
||||||
out_p += (VALUE)(*my_transcoder->func_io)(next_info, out_p);
|
out_p += (VALUE)(*my_transcoder->func_io)(my_transcoding, next_info, out_p);
|
||||||
break;
|
break;
|
||||||
case FUNso:
|
case FUNso:
|
||||||
out_p += (VALUE)(*my_transcoder->func_so)(char_start, out_p);
|
out_p += (VALUE)(*my_transcoder->func_so)(my_transcoding, char_start, (size_t)(in_p-char_start), out_p);
|
||||||
break;
|
break;
|
||||||
case INVALID:
|
case INVALID:
|
||||||
goto invalid;
|
goto invalid;
|
||||||
@ -290,6 +443,16 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
/* cleanup */
|
/* cleanup */
|
||||||
|
if (my_transcoder->finish_func) {
|
||||||
|
if (out_p >= out_s) {
|
||||||
|
int len = (out_p - *out_pos);
|
||||||
|
int new_len = (len + my_transcoder->max_output) * 2;
|
||||||
|
*out_pos = (*my_transcoding->flush_func)(my_transcoding, len, new_len);
|
||||||
|
out_p = *out_pos + len;
|
||||||
|
out_s = *out_pos + new_len - my_transcoder->max_output;
|
||||||
|
}
|
||||||
|
out_p += my_transcoder->finish_func(my_transcoding, out_p);
|
||||||
|
}
|
||||||
*in_pos = in_p;
|
*in_pos = in_p;
|
||||||
*out_pos = out_p;
|
*out_pos = out_p;
|
||||||
}
|
}
|
||||||
@ -401,21 +564,8 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
|
|||||||
}
|
}
|
||||||
|
|
||||||
my_transcoding.transcoder = my_transcoder;
|
my_transcoding.transcoder = my_transcoder;
|
||||||
|
memset(my_transcoding.stateful, 0, sizeof(my_transcoding.stateful));
|
||||||
|
|
||||||
if (my_transcoder->preprocessor) {
|
|
||||||
fromp = sp = (unsigned char *)RSTRING_PTR(str);
|
|
||||||
slen = RSTRING_LEN(str);
|
|
||||||
blen = slen + 30; /* len + margin */
|
|
||||||
dest = rb_str_tmp_new(blen);
|
|
||||||
bp = (unsigned char *)RSTRING_PTR(dest);
|
|
||||||
my_transcoding.ruby_string_dest = dest;
|
|
||||||
(*my_transcoder->preprocessor)(&fromp, &bp, (sp+slen), (bp+blen), &my_transcoding);
|
|
||||||
if (fromp != sp+slen) {
|
|
||||||
rb_raise(rb_eArgError, "not fully converted, %"PRIdPTRDIFF" bytes left", sp+slen-fromp);
|
|
||||||
}
|
|
||||||
rb_str_set_len(dest, (char *)bp - RSTRING_PTR(dest));
|
|
||||||
str = dest;
|
|
||||||
}
|
|
||||||
fromp = sp = (unsigned char *)RSTRING_PTR(str);
|
fromp = sp = (unsigned char *)RSTRING_PTR(str);
|
||||||
slen = RSTRING_LEN(str);
|
slen = RSTRING_LEN(str);
|
||||||
blen = slen + 30; /* len + margin */
|
blen = slen + 30; /* len + margin */
|
||||||
@ -431,21 +581,6 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
|
|||||||
buf = (unsigned char *)RSTRING_PTR(dest);
|
buf = (unsigned char *)RSTRING_PTR(dest);
|
||||||
*bp = '\0';
|
*bp = '\0';
|
||||||
rb_str_set_len(dest, bp - buf);
|
rb_str_set_len(dest, bp - buf);
|
||||||
if (my_transcoder->postprocessor) {
|
|
||||||
str = dest;
|
|
||||||
fromp = sp = (unsigned char *)RSTRING_PTR(str);
|
|
||||||
slen = RSTRING_LEN(str);
|
|
||||||
blen = slen + 30; /* len + margin */
|
|
||||||
dest = rb_str_tmp_new(blen);
|
|
||||||
bp = (unsigned char *)RSTRING_PTR(dest);
|
|
||||||
my_transcoding.ruby_string_dest = dest;
|
|
||||||
(*my_transcoder->postprocessor)(&fromp, &bp, (sp+slen), (bp+blen), &my_transcoding);
|
|
||||||
if (fromp != sp+slen) {
|
|
||||||
rb_raise(rb_eArgError, "not fully converted, %"PRIdPTRDIFF" bytes left", sp+slen-fromp);
|
|
||||||
}
|
|
||||||
buf = (unsigned char *)RSTRING_PTR(dest);
|
|
||||||
rb_str_set_len(dest, bp - buf);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (encoding_equal(my_transcoder->to_encoding, to_e)) {
|
if (encoding_equal(my_transcoder->to_encoding, to_e)) {
|
||||||
final_encoding = 1;
|
final_encoding = 1;
|
||||||
@ -541,7 +676,6 @@ void
|
|||||||
Init_transcode(void)
|
Init_transcode(void)
|
||||||
{
|
{
|
||||||
transcoder_table = st_init_strcasetable();
|
transcoder_table = st_init_strcasetable();
|
||||||
transcoder_lib_table = st_init_strcasetable();
|
|
||||||
|
|
||||||
sym_invalid = ID2SYM(rb_intern("invalid"));
|
sym_invalid = ID2SYM(rb_intern("invalid"));
|
||||||
sym_undef = ID2SYM(rb_intern("undef"));
|
sym_undef = ID2SYM(rb_intern("undef"));
|
||||||
|
@ -63,6 +63,8 @@ typedef struct rb_transcoding {
|
|||||||
VALUE ruby_string_dest; /* the String used as the conversion destination,
|
VALUE ruby_string_dest; /* the String used as the conversion destination,
|
||||||
or NULL if something else is being converted */
|
or NULL if something else is being converted */
|
||||||
unsigned char *(*flush_func)(struct rb_transcoding*, int, int);
|
unsigned char *(*flush_func)(struct rb_transcoding*, int, int);
|
||||||
|
|
||||||
|
unsigned char stateful[256]; /* opaque data for stateful encoding */
|
||||||
} rb_transcoding;
|
} rb_transcoding;
|
||||||
|
|
||||||
/* static structure, one per supported encoding pair */
|
/* static structure, one per supported encoding pair */
|
||||||
@ -72,12 +74,11 @@ typedef struct rb_transcoder {
|
|||||||
const BYTE_LOOKUP *conv_tree_start;
|
const BYTE_LOOKUP *conv_tree_start;
|
||||||
int max_output;
|
int max_output;
|
||||||
int from_utf8;
|
int from_utf8;
|
||||||
void (*preprocessor)(const unsigned char**, unsigned char**, const unsigned char*, unsigned char*, struct rb_transcoding *);
|
VALUE (*func_ii)(rb_transcoding*, VALUE); /* info -> info */
|
||||||
void (*postprocessor)(const unsigned char**, unsigned char**, const unsigned char*, unsigned char*, struct rb_transcoding *);
|
VALUE (*func_si)(rb_transcoding*, const unsigned char*, size_t); /* start -> info */
|
||||||
VALUE (*func_ii)(VALUE); /* info -> info */
|
int (*func_io)(rb_transcoding*, VALUE, const unsigned char*); /* info -> output */
|
||||||
VALUE (*func_si)(const unsigned char *); /* start -> info */
|
int (*func_so)(rb_transcoding*, const unsigned char*, size_t, unsigned char*); /* start -> output */
|
||||||
int (*func_io)(VALUE, const unsigned char*); /* info -> output */
|
int (*finish_func)(rb_transcoding*, unsigned char*); /* -> output */
|
||||||
int (*func_so)(const unsigned char*, unsigned char*); /* start -> output */
|
|
||||||
} rb_transcoder;
|
} rb_transcoder;
|
||||||
|
|
||||||
void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib);
|
void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user