From 8a269f85a0e6aaaecfdadcd185df7a686b45696e Mon Sep 17 00:00:00 2001 From: naruse Date: Tue, 20 Apr 2010 18:19:01 +0000 Subject: [PATCH] * io.c (io_getc): set read_encoding to resulted one character string. [ruby-dev:41023] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27426 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 5 +++++ io.c | 35 +++++++++++++++++++++++------------ test/ruby/test_io_m17n.rb | 15 +++++++++++++++ 3 files changed, 43 insertions(+), 12 deletions(-) diff --git a/ChangeLog b/ChangeLog index 05f107f3f3..11020e775c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +Wed Apr 21 03:17:17 2010 NARUSE, Yui + + * io.c (io_getc): set read_encoding to resulted one character + string. [ruby-dev:41023] + Wed Apr 21 00:29:39 2010 Yusuke Endoh * bignum.c (bigmul1_karatsuba): fix calculation order to prevent diff --git a/io.c b/io.c index e78c67863e..d74492a73e 100644 --- a/io.c +++ b/io.c @@ -2739,17 +2739,15 @@ io_getc(rb_io_t *fptr, rb_encoding *enc) if (NEED_READCONV(fptr)) { VALUE str = Qnil; + rb_encoding *read_enc = io_read_encoding(fptr); make_readconv(fptr, 0); while (1) { if (fptr->cbuf_len) { - if (fptr->encs.enc) - r = rb_enc_precise_mbclen(fptr->cbuf+fptr->cbuf_off, - fptr->cbuf+fptr->cbuf_off+fptr->cbuf_len, - fptr->encs.enc); - else - r = ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1); + r = rb_enc_precise_mbclen(fptr->cbuf+fptr->cbuf_off, + fptr->cbuf+fptr->cbuf_off+fptr->cbuf_len, + read_enc); if (!MBCLEN_NEEDMORE_P(r)) break; if (fptr->cbuf_len == fptr->cbuf_capa) { @@ -2761,17 +2759,30 @@ io_getc(rb_io_t *fptr, rb_encoding *enc) clear_readconv(fptr); if (fptr->cbuf_len == 0) return Qnil; - /* return an incomplete character just before EOF */ - return io_shift_cbuf(fptr, fptr->cbuf_len, &str); + /* return an unit of an incomplete character just before EOF */ + r = rb_enc_mbclen(fptr->cbuf+fptr->cbuf_off, + fptr->cbuf+fptr->cbuf_off+fptr->cbuf_len, + read_enc); + io_shift_cbuf(fptr, r, &str); + str = io_enc_str(str, fptr); + ENC_CODERANGE_SET(str, ENC_CODERANGE_BROKEN); + return str; } } if (MBCLEN_INVALID_P(r)) { r = rb_enc_mbclen(fptr->cbuf+fptr->cbuf_off, fptr->cbuf+fptr->cbuf_off+fptr->cbuf_len, - fptr->encs.enc); - return io_shift_cbuf(fptr, r, &str); - } - return io_shift_cbuf(fptr, MBCLEN_CHARFOUND_LEN(r), &str); + read_enc); + io_shift_cbuf(fptr, r, &str); + cr = ENC_CODERANGE_BROKEN; + } + else { + io_shift_cbuf(fptr, MBCLEN_CHARFOUND_LEN(r), &str); + cr = ENC_CODERANGE_VALID; + } + str = io_enc_str(str, fptr); + ENC_CODERANGE_SET(str, cr); + return str; } if (io_fillbuf(fptr) < 0) { diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb index deacd0b36d..04c97c61b2 100644 --- a/test/ruby/test_io_m17n.rb +++ b/test/ruby/test_io_m17n.rb @@ -385,6 +385,21 @@ EOT } end + def test_getc_newlineconv + with_tmpdir { + src = "\u3042" + generate_file('tmp', src) + defext = Encoding.default_external + Encoding.default_external = Encoding::UTF_8 + open("tmp", "rt") {|f| + s = f.getc + assert_equal(true, s.valid_encoding?) + assert_equal("\u3042", s) + } + Encoding.default_external = defext + } + end + def test_ungetc_stateful_conversion with_tmpdir { src = "before \e$B\x23\x30\x23\x31\e(B after".force_encoding("iso-2022-jp")