* transcode.c (transcode_loop): insert output the value when
fallback hash has a related key. [ruby-dev:40540] [ruby-dev:40829] #3036 * transcode.c (rb_econv_prepare_opts): pass to newhash a value with the key :fallback. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27326 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
9afaef347c
commit
c871aee96b
@ -1,3 +1,12 @@
|
|||||||
|
Tue Apr 13 09:32:12 2010 NARUSE, Yui <naruse@ruby-lang.org>
|
||||||
|
|
||||||
|
* transcode.c (transcode_loop): insert output the value when
|
||||||
|
fallback hash has a related key. [ruby-dev:40540]
|
||||||
|
[ruby-dev:40829] #3036
|
||||||
|
|
||||||
|
* transcode.c (rb_econv_prepare_opts): pass to newhash
|
||||||
|
a value with the key :fallback.
|
||||||
|
|
||||||
Tue Apr 13 00:12:04 2010 Tanaka Akira <akr@fsij.org>
|
Tue Apr 13 00:12:04 2010 Tanaka Akira <akr@fsij.org>
|
||||||
|
|
||||||
* random.c (rand_init): use the absolute value of seed to
|
* random.c (rand_init): use the absolute value of seed to
|
||||||
|
@ -1892,8 +1892,7 @@ class TestTranscode < Test::Unit::TestCase
|
|||||||
check_both_ways("\u795E\u6797\u7FA9\u535A", "\xAF\xAB\xAA\x4C\xB8\x71\xB3\xD5", 'Big5-HKSCS') # 神林義博
|
check_both_ways("\u795E\u6797\u7FA9\u535A", "\xAF\xAB\xAA\x4C\xB8\x71\xB3\xD5", 'Big5-HKSCS') # 神林義博
|
||||||
end
|
end
|
||||||
|
|
||||||
def
|
def test_Big5_UAO
|
||||||
test_Big5_UAO
|
|
||||||
check_both_ways("\u4e17", "\x81\x40", 'Big5-UAO') # 丗
|
check_both_ways("\u4e17", "\x81\x40", 'Big5-UAO') # 丗
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -1903,4 +1902,13 @@ class TestTranscode < Test::Unit::TestCase
|
|||||||
assert_equal(Encoding::US_ASCII, a.encoding)
|
assert_equal(Encoding::US_ASCII, a.encoding)
|
||||||
assert_equal(Encoding::Shift_JIS, b.encoding)
|
assert_equal(Encoding::Shift_JIS, b.encoding)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_fallback
|
||||||
|
assert_equal("\u3042".encode("EUC-JP"), "\u{20000}".encode("EUC-JP",
|
||||||
|
fallback: {"\u{20000}" => "\u3042".encode("EUC-JP")}))
|
||||||
|
assert_equal("\u3042".encode("EUC-JP"), "\u{20000}".encode("EUC-JP",
|
||||||
|
fallback: {"\u{20000}" => "\u3042"}))
|
||||||
|
assert_equal("[ISU]", "\u{1F4BA}".encode("SJIS-KDDI",
|
||||||
|
fallback: {"\u{1F4BA}" => "[ISU]"}))
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
39
transcode.c
39
transcode.c
@ -21,7 +21,7 @@ VALUE rb_eConverterNotFoundError;
|
|||||||
|
|
||||||
VALUE rb_cEncodingConverter;
|
VALUE rb_cEncodingConverter;
|
||||||
|
|
||||||
static VALUE sym_invalid, sym_undef, sym_replace;
|
static VALUE sym_invalid, sym_undef, sym_replace, sym_fallback;
|
||||||
static VALUE sym_xml, sym_text, sym_attr;
|
static VALUE sym_xml, sym_text, sym_attr;
|
||||||
static VALUE sym_universal_newline;
|
static VALUE sym_universal_newline;
|
||||||
static VALUE sym_crlf_newline;
|
static VALUE sym_crlf_newline;
|
||||||
@ -2256,17 +2256,37 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
|
|||||||
unsigned char *out_start = *out_pos;
|
unsigned char *out_start = *out_pos;
|
||||||
int max_output;
|
int max_output;
|
||||||
VALUE exc;
|
VALUE exc;
|
||||||
|
VALUE fallback = Qnil;
|
||||||
|
|
||||||
ec = rb_econv_open_opts(src_encoding, dst_encoding, ecflags, ecopts);
|
ec = rb_econv_open_opts(src_encoding, dst_encoding, ecflags, ecopts);
|
||||||
if (!ec)
|
if (!ec)
|
||||||
rb_exc_raise(rb_econv_open_exc(src_encoding, dst_encoding, ecflags));
|
rb_exc_raise(rb_econv_open_exc(src_encoding, dst_encoding, ecflags));
|
||||||
|
|
||||||
|
if (!NIL_P(ecopts) && TYPE(ecopts) == T_HASH)
|
||||||
|
fallback = rb_hash_aref(ecopts, sym_fallback);
|
||||||
last_tc = ec->last_tc;
|
last_tc = ec->last_tc;
|
||||||
max_output = last_tc ? last_tc->transcoder->max_output : 1;
|
max_output = last_tc ? last_tc->transcoder->max_output : 1;
|
||||||
|
|
||||||
resume:
|
resume:
|
||||||
ret = rb_econv_convert(ec, in_pos, in_stop, out_pos, out_stop, 0);
|
ret = rb_econv_convert(ec, in_pos, in_stop, out_pos, out_stop, 0);
|
||||||
|
|
||||||
|
if (!NIL_P(fallback) && ret == econv_undefined_conversion) {
|
||||||
|
VALUE rep = rb_enc_str_new(
|
||||||
|
(const char *)ec->last_error.error_bytes_start,
|
||||||
|
ec->last_error.error_bytes_len,
|
||||||
|
rb_enc_find(ec->last_error.source_encoding));
|
||||||
|
rep = rb_hash_lookup2(fallback, rep, Qundef);
|
||||||
|
if (rep != Qundef) {
|
||||||
|
StringValue(rep);
|
||||||
|
ret = rb_econv_insert_output(ec, (const unsigned char *)RSTRING_PTR(rep),
|
||||||
|
RSTRING_LEN(rep), rb_enc_name(rb_enc_get(rep)));
|
||||||
|
if (ret == -1) {
|
||||||
|
rb_raise(rb_eArgError, "too big fallback string");
|
||||||
|
}
|
||||||
|
goto resume;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (ret == econv_invalid_byte_sequence ||
|
if (ret == econv_invalid_byte_sequence ||
|
||||||
ret == econv_incomplete_input ||
|
ret == econv_incomplete_input ||
|
||||||
ret == econv_undefined_conversion) {
|
ret == econv_undefined_conversion) {
|
||||||
@ -2442,6 +2462,7 @@ rb_econv_prepare_opts(VALUE opthash, VALUE *opts)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
ecflags = econv_opts(opthash);
|
ecflags = econv_opts(opthash);
|
||||||
|
|
||||||
v = rb_hash_aref(opthash, sym_replace);
|
v = rb_hash_aref(opthash, sym_replace);
|
||||||
if (!NIL_P(v)) {
|
if (!NIL_P(v)) {
|
||||||
StringValue(v);
|
StringValue(v);
|
||||||
@ -2456,6 +2477,16 @@ rb_econv_prepare_opts(VALUE opthash, VALUE *opts)
|
|||||||
rb_hash_aset(newhash, sym_replace, v);
|
rb_hash_aset(newhash, sym_replace, v);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
v = rb_hash_aref(opthash, sym_fallback);
|
||||||
|
if (!NIL_P(v)) {
|
||||||
|
v = rb_convert_type(v, T_HASH, "Hash", "to_hash");
|
||||||
|
if (!NIL_P(v)) {
|
||||||
|
if (NIL_P(newhash))
|
||||||
|
newhash = rb_hash_new();
|
||||||
|
rb_hash_aset(newhash, sym_fallback, v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!NIL_P(newhash))
|
if (!NIL_P(newhash))
|
||||||
rb_hash_freeze(newhash);
|
rb_hash_freeze(newhash);
|
||||||
*opts = newhash;
|
*opts = newhash;
|
||||||
@ -2728,6 +2759,11 @@ str_encode_bang(int argc, VALUE *argv, VALUE str)
|
|||||||
* :replace ::
|
* :replace ::
|
||||||
* Sets the replacement string to the value. The default replacement
|
* Sets the replacement string to the value. The default replacement
|
||||||
* string is "\uFFFD" for Unicode encoding forms, and "?" otherwise.
|
* string is "\uFFFD" for Unicode encoding forms, and "?" otherwise.
|
||||||
|
* :fallback ::
|
||||||
|
* Sets the replacement string by the hash for undefined character.
|
||||||
|
* Its key is a such undefined character encoded in source encoding
|
||||||
|
* of current transcoder. Its value can be any encoding until it
|
||||||
|
* can be converted into the destination encoding of the transcoder.
|
||||||
* :xml ::
|
* :xml ::
|
||||||
* The value must be <code>:text</code> or <code>:attr</code>.
|
* The value must be <code>:text</code> or <code>:attr</code>.
|
||||||
* If the value is <code>:text</code> <code>#encode</code> replaces
|
* If the value is <code>:text</code> <code>#encode</code> replaces
|
||||||
@ -4193,6 +4229,7 @@ Init_transcode(void)
|
|||||||
sym_invalid = ID2SYM(rb_intern("invalid"));
|
sym_invalid = ID2SYM(rb_intern("invalid"));
|
||||||
sym_undef = ID2SYM(rb_intern("undef"));
|
sym_undef = ID2SYM(rb_intern("undef"));
|
||||||
sym_replace = ID2SYM(rb_intern("replace"));
|
sym_replace = ID2SYM(rb_intern("replace"));
|
||||||
|
sym_fallback = ID2SYM(rb_intern("fallback"));
|
||||||
sym_xml = ID2SYM(rb_intern("xml"));
|
sym_xml = ID2SYM(rb_intern("xml"));
|
||||||
sym_text = ID2SYM(rb_intern("text"));
|
sym_text = ID2SYM(rb_intern("text"));
|
||||||
sym_attr = ID2SYM(rb_intern("attr"));
|
sym_attr = ID2SYM(rb_intern("attr"));
|
||||||
|
Loading…
x
Reference in New Issue
Block a user