* transcode.c (sym_universal_newline_decoder): new variable.

(sym_crlf_newline_encoder): ditto.
  (sym_cr_newline_encoder): ditto.
  (econv_opts): check newline converter options.
  (econv_init): make 3rd argument hash/nil only.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19130 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
akr 2008-09-04 12:08:54 +00:00
parent 4780af9566
commit f7607eff9a
3 changed files with 60 additions and 39 deletions

View File

@ -1,3 +1,11 @@
Thu Sep 4 21:04:27 2008 Tanaka Akira <akr@fsij.org>
* transcode.c (sym_universal_newline_decoder): new variable.
(sym_crlf_newline_encoder): ditto.
(sym_cr_newline_encoder): ditto.
(econv_opts): check newline converter options.
(econv_init): make 3rd argument hash/nil only.
Thu Sep 4 21:03:28 2008 Koichi Sasada <ko1@atdot.net> Thu Sep 4 21:03:28 2008 Koichi Sasada <ko1@atdot.net>
* vm_dump.c: rename some debug functions. * vm_dump.c: rename some debug functions.

View File

@ -328,7 +328,7 @@ class TestEncodingConverter < Test::Unit::TestCase
end end
def test_universal_newline def test_universal_newline
ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::UNIVERSAL_NEWLINE_DECODER) ec = Encoding::Converter.new("UTF-8", "EUC-JP", universal_newline_decoder: true)
a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT] a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
src << "abc\r\ndef"; check_ec("abc\ndef", "", :source_buffer_empty, *a) src << "abc\r\ndef"; check_ec("abc\ndef", "", :source_buffer_empty, *a)
src << "ghi\njkl"; check_ec("abc\ndefghi\njkl", "", :source_buffer_empty, *a) src << "ghi\njkl"; check_ec("abc\ndefghi\njkl", "", :source_buffer_empty, *a)
@ -339,7 +339,7 @@ class TestEncodingConverter < Test::Unit::TestCase
end end
def test_universal_newline2 def test_universal_newline2
ec = Encoding::Converter.new("", "", Encoding::Converter::UNIVERSAL_NEWLINE_DECODER) ec = Encoding::Converter.new("", "", universal_newline_decoder: true)
a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT] a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
src << "abc\r\ndef"; check_ec("abc\ndef", "", :source_buffer_empty, *a) src << "abc\r\ndef"; check_ec("abc\ndef", "", :source_buffer_empty, *a)
src << "ghi\njkl"; check_ec("abc\ndefghi\njkl", "", :source_buffer_empty, *a) src << "ghi\njkl"; check_ec("abc\ndefghi\njkl", "", :source_buffer_empty, *a)
@ -350,22 +350,22 @@ class TestEncodingConverter < Test::Unit::TestCase
end end
def test_crlf_newline def test_crlf_newline
ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::CRLF_NEWLINE_ENCODER) ec = Encoding::Converter.new("UTF-8", "EUC-JP", crlf_newline_encoder: true)
assert_econv("abc\r\ndef", :finished, 50, ec, "abc\ndef", "") assert_econv("abc\r\ndef", :finished, 50, ec, "abc\ndef", "")
end end
def test_crlf_newline2 def test_crlf_newline2
ec = Encoding::Converter.new("", "", Encoding::Converter::CRLF_NEWLINE_ENCODER) ec = Encoding::Converter.new("", "", crlf_newline_encoder: true)
assert_econv("abc\r\ndef", :finished, 50, ec, "abc\ndef", "") assert_econv("abc\r\ndef", :finished, 50, ec, "abc\ndef", "")
end end
def test_cr_newline def test_cr_newline
ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::CR_NEWLINE_ENCODER) ec = Encoding::Converter.new("UTF-8", "EUC-JP", cr_newline_encoder: true)
assert_econv("abc\rdef", :finished, 50, ec, "abc\ndef", "") assert_econv("abc\rdef", :finished, 50, ec, "abc\ndef", "")
end end
def test_cr_newline2 def test_cr_newline2
ec = Encoding::Converter.new("", "", Encoding::Converter::CR_NEWLINE_ENCODER) ec = Encoding::Converter.new("", "", cr_newline_encoder: true)
assert_econv("abc\rdef", :finished, 50, ec, "abc\ndef", "") assert_econv("abc\rdef", :finished, 50, ec, "abc\ndef", "")
end end
@ -516,7 +516,7 @@ class TestEncodingConverter < Test::Unit::TestCase
end end
def test_invalid_replace def test_invalid_replace
ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::INVALID_REPLACE) ec = Encoding::Converter.new("UTF-8", "EUC-JP", invalid: :replace)
ret = ec.primitive_convert(src="abc\x80def", dst="", nil, 100) ret = ec.primitive_convert(src="abc\x80def", dst="", nil, 100)
assert_equal(:finished, ret) assert_equal(:finished, ret)
assert_equal("", src) assert_equal("", src)
@ -532,7 +532,7 @@ class TestEncodingConverter < Test::Unit::TestCase
end end
def test_undef_replace def test_undef_replace
ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::UNDEF_REPLACE) ec = Encoding::Converter.new("UTF-8", "EUC-JP", :undef => :replace)
ret = ec.primitive_convert(src="abc\u{fffd}def", dst="", nil, 100) ret = ec.primitive_convert(src="abc\u{fffd}def", dst="", nil, 100)
assert_equal(:finished, ret) assert_equal(:finished, ret)
assert_equal("", src) assert_equal("", src)
@ -650,7 +650,7 @@ class TestEncodingConverter < Test::Unit::TestCase
end end
def test_set_replacement def test_set_replacement
ec = Encoding::Converter.new("utf-8", "us-ascii", Encoding::Converter::UNDEF_REPLACE) ec = Encoding::Converter.new("utf-8", "us-ascii", :undef => :replace)
ec.replacement = "<undef>" ec.replacement = "<undef>"
assert_equal("a <undef> b", ec.convert("a \u3042 b")) assert_equal("a <undef> b", ec.convert("a \u3042 b"))
end end

View File

@ -21,6 +21,9 @@ VALUE rb_eNoConverter;
VALUE rb_cEncodingConverter; VALUE rb_cEncodingConverter;
static VALUE sym_invalid, sym_undef, sym_ignore, sym_replace; static VALUE sym_invalid, sym_undef, sym_ignore, sym_replace;
static VALUE sym_universal_newline_decoder;
static VALUE sym_crlf_newline_encoder;
static VALUE sym_cr_newline_encoder;
static VALUE sym_invalid_byte_sequence; static VALUE sym_invalid_byte_sequence;
static VALUE sym_undefined_conversion; static VALUE sym_undefined_conversion;
@ -1998,27 +2001,42 @@ static int
econv_opts(VALUE opt) econv_opts(VALUE opt)
{ {
VALUE v; VALUE v;
int options = 0; int ecflags = 0;
v = rb_hash_aref(opt, sym_invalid); v = rb_hash_aref(opt, sym_invalid);
if (NIL_P(v)) { if (NIL_P(v)) {
} }
else if (v==sym_replace) { else if (v==sym_replace) {
options |= ECONV_INVALID_REPLACE; ecflags |= ECONV_INVALID_REPLACE;
v = rb_hash_aref(opt, sym_replace); v = rb_hash_aref(opt, sym_replace);
} }
else { else {
rb_raise(rb_eArgError, "unknown value for invalid character option"); rb_raise(rb_eArgError, "unknown value for invalid character option");
} }
v = rb_hash_aref(opt, sym_undef); v = rb_hash_aref(opt, sym_undef);
if (NIL_P(v)) { if (NIL_P(v)) {
} }
else if (v==sym_replace) { else if (v==sym_replace) {
options |= ECONV_UNDEF_REPLACE; ecflags |= ECONV_UNDEF_REPLACE;
} }
else { else {
rb_raise(rb_eArgError, "unknown value for undefined character option"); rb_raise(rb_eArgError, "unknown value for undefined character option");
} }
return options;
v = rb_hash_aref(opt, sym_universal_newline_decoder);
if (RTEST(v))
ecflags |= ECONV_UNIVERSAL_NEWLINE_DECODER;
v = rb_hash_aref(opt, sym_crlf_newline_encoder);
if (RTEST(v))
ecflags |= ECONV_CRLF_NEWLINE_ENCODER;
v = rb_hash_aref(opt, sym_cr_newline_encoder);
if (RTEST(v))
ecflags |= ECONV_CR_NEWLINE_ENCODER;
return ecflags;
} }
int int
@ -2307,12 +2325,17 @@ make_dummy_encoding(const char *name)
/* /*
* call-seq: * call-seq:
* Encoding::Converter.new(source_encoding, destination_encoding) * Encoding::Converter.new(source_encoding, destination_encoding)
* Encoding::Converter.new(source_encoding, destination_encoding, flags) * Encoding::Converter.new(source_encoding, destination_encoding, opthash)
* *
* possible flags: * possible opthash elements:
* Encoding::Converter::UNIVERSAL_NEWLINE_DECODER # convert CRLF and CR to LF at last * :universal_newline_decoder => true # convert CRLF and CR to LF at last
* Encoding::Converter::CRLF_NEWLINE_ENCODER # convert LF to CRLF at first * :crlf_newline_encoder => true # convert LF to CRLF at first
* Encoding::Converter::CR_NEWLINE_ENCODER # convert LF to CR at first * :cr_newline_encoder => true # convert LF to CR at first
* :invalid => nil # error on invalid byte sequence (default)
* :invalid => :replace # replace invalid byte sequence
* :undef => nil # error on undefined conversion (default)
* :undef => :replace # replace undefined conversion
* :replace => string # replacement string ("?" or "\uFFFD" if not specified)
* *
* Encoding::Converter.new creates an instance of Encoding::Converter. * Encoding::Converter.new creates an instance of Encoding::Converter.
* *
@ -2327,38 +2350,32 @@ make_dummy_encoding(const char *name)
* *
* # (1) convert UTF-16BE to UTF-8 * # (1) convert UTF-16BE to UTF-8
* # (2) convert CRLF and CR to LF * # (2) convert CRLF and CR to LF
* ec = Encoding::Converter.new("UTF-16BE", "UTF-8", Encoding::Converter::UNIVERSAL_NEWLINE_DECODER) * ec = Encoding::Converter.new("UTF-16BE", "UTF-8", :universal_newline_decoder => true)
* *
* # (1) convert LF to CRLF * # (1) convert LF to CRLF
* # (2) convert UTF-8 to UTF-16BE * # (2) convert UTF-8 to UTF-16BE
* ec = Encoding::Converter.new("UTF-8", "UTF-16BE", Encoding::Converter::CRLF_NEWLINE_ENCODER) * ec = Encoding::Converter.new("UTF-8", "UTF-16BE", :crlf_newline_encoder => true)
* *
*/ */
static VALUE static VALUE
econv_init(int argc, VALUE *argv, VALUE self) econv_init(int argc, VALUE *argv, VALUE self)
{ {
VALUE source_encoding, destination_encoding, flags_v, opt, ecopts; VALUE source_encoding, destination_encoding, opthash, ecopts;
int sidx, didx; int sidx, didx;
const char *sname, *dname; const char *sname, *dname;
rb_encoding *senc, *denc; rb_encoding *senc, *denc;
rb_econv_t *ec; rb_econv_t *ec;
int ecflags; int ecflags;
rb_scan_args(argc, argv, "21", &source_encoding, &destination_encoding, &flags_v); rb_scan_args(argc, argv, "21", &source_encoding, &destination_encoding, &opthash);
if (flags_v == Qnil) { if (opthash == Qnil) {
ecflags = 0; ecflags = 0;
ecopts = Qnil; ecopts = Qnil;
} }
else { else {
opt = rb_check_convert_type(argv[argc-1], T_HASH, "Hash", "to_hash"); opthash = rb_convert_type(opthash, T_HASH, "Hash", "to_hash");
if (!NIL_P(opt)) { ecflags = rb_econv_prepare_opts(opthash, &ecopts);
ecflags = rb_econv_prepare_opts(opt, &ecopts);
}
else {
ecflags = NUM2INT(flags_v);
ecopts = Qnil;
}
} }
senc = NULL; senc = NULL;
@ -3073,7 +3090,7 @@ econv_get_replacement(VALUE self)
* *
* sets the replacement string. * sets the replacement string.
* *
* ec = Encoding::Converter.new("utf-8", "us-ascii", Encoding::Converter::UNDEF_REPLACE) * ec = Encoding::Converter.new("utf-8", "us-ascii", :undef => :replace)
* ec.replacement = "<undef>" * ec.replacement = "<undef>"
* p ec.convert("a \u3042 b") #=> "a <undef> b" * p ec.convert("a \u3042 b") #=> "a <undef> b"
*/ */
@ -3281,6 +3298,9 @@ Init_transcode(void)
sym_finished = ID2SYM(rb_intern("finished")); sym_finished = ID2SYM(rb_intern("finished"));
sym_output_followed_by_input = ID2SYM(rb_intern("output_followed_by_input")); sym_output_followed_by_input = ID2SYM(rb_intern("output_followed_by_input"));
sym_incomplete_input = ID2SYM(rb_intern("incomplete_input")); sym_incomplete_input = ID2SYM(rb_intern("incomplete_input"));
sym_universal_newline_decoder = ID2SYM(rb_intern("universal_newline_decoder"));
sym_crlf_newline_encoder = ID2SYM(rb_intern("crlf_newline_encoder"));
sym_cr_newline_encoder = ID2SYM(rb_intern("cr_newline_encoder"));
rb_define_method(rb_cString, "encode", str_encode, -1); rb_define_method(rb_cString, "encode", str_encode, -1);
rb_define_method(rb_cString, "encode!", str_encode_bang, -1); rb_define_method(rb_cString, "encode!", str_encode_bang, -1);
@ -3300,15 +3320,8 @@ Init_transcode(void)
rb_define_method(rb_cEncodingConverter, "last_error", econv_last_error, 0); rb_define_method(rb_cEncodingConverter, "last_error", econv_last_error, 0);
rb_define_method(rb_cEncodingConverter, "replacement", econv_get_replacement, 0); rb_define_method(rb_cEncodingConverter, "replacement", econv_get_replacement, 0);
rb_define_method(rb_cEncodingConverter, "replacement=", econv_set_replacement, 1); rb_define_method(rb_cEncodingConverter, "replacement=", econv_set_replacement, 1);
rb_define_const(rb_cEncodingConverter, "INVALID_MASK", INT2FIX(ECONV_INVALID_MASK));
rb_define_const(rb_cEncodingConverter, "INVALID_REPLACE", INT2FIX(ECONV_INVALID_REPLACE));
rb_define_const(rb_cEncodingConverter, "UNDEF_MASK", INT2FIX(ECONV_UNDEF_MASK));
rb_define_const(rb_cEncodingConverter, "UNDEF_REPLACE", INT2FIX(ECONV_UNDEF_REPLACE));
rb_define_const(rb_cEncodingConverter, "PARTIAL_INPUT", INT2FIX(ECONV_PARTIAL_INPUT)); rb_define_const(rb_cEncodingConverter, "PARTIAL_INPUT", INT2FIX(ECONV_PARTIAL_INPUT));
rb_define_const(rb_cEncodingConverter, "OUTPUT_FOLLOWED_BY_INPUT", INT2FIX(ECONV_OUTPUT_FOLLOWED_BY_INPUT)); rb_define_const(rb_cEncodingConverter, "OUTPUT_FOLLOWED_BY_INPUT", INT2FIX(ECONV_OUTPUT_FOLLOWED_BY_INPUT));
rb_define_const(rb_cEncodingConverter, "UNIVERSAL_NEWLINE_DECODER", INT2FIX(ECONV_UNIVERSAL_NEWLINE_DECODER));
rb_define_const(rb_cEncodingConverter, "CRLF_NEWLINE_ENCODER", INT2FIX(ECONV_CRLF_NEWLINE_ENCODER));
rb_define_const(rb_cEncodingConverter, "CR_NEWLINE_ENCODER", INT2FIX(ECONV_CR_NEWLINE_ENCODER));
rb_define_method(rb_eConversionUndefined, "source_encoding_name", ecerr_source_encoding_name, 0); rb_define_method(rb_eConversionUndefined, "source_encoding_name", ecerr_source_encoding_name, 0);
rb_define_method(rb_eConversionUndefined, "destination_encoding_name", ecerr_destination_encoding_name, 0); rb_define_method(rb_eConversionUndefined, "destination_encoding_name", ecerr_destination_encoding_name, 0);