[Bug #20869] Discard read buffer and encoding converters at seeking

This commit is contained in:
Nobuyoshi Nakada 2024-11-07 21:03:37 +09:00
parent 1701ddea73
commit ee29aade1a
No known key found for this signature in database
GPG Key ID: 3582D74E1FEE4465
Notes: git 2024-11-07 12:38:43 +00:00
2 changed files with 49 additions and 18 deletions

41
io.c
View File

@ -541,7 +541,8 @@ rb_cloexec_fcntl_dupfd(int fd, int minfd)
#endif #endif
static int io_fflush(rb_io_t *); static int io_fflush(rb_io_t *);
static rb_io_t *flush_before_seek(rb_io_t *fptr); static rb_io_t *flush_before_seek(rb_io_t *fptr, bool discard_rbuf);
static void clear_codeconv(rb_io_t *fptr);
#define FMODE_SIGNAL_ON_EPIPE (1<<17) #define FMODE_SIGNAL_ON_EPIPE (1<<17)
@ -626,7 +627,7 @@ rb_sys_fail_on_write(rb_io_t *fptr)
* IO unread with taking care of removed '\r' in text mode. * IO unread with taking care of removed '\r' in text mode.
*/ */
static void static void
io_unread(rb_io_t *fptr) io_unread(rb_io_t *fptr, bool discard_rbuf)
{ {
rb_off_t r, pos; rb_off_t r, pos;
ssize_t read_size; ssize_t read_size;
@ -647,19 +648,17 @@ io_unread(rb_io_t *fptr)
if (r < 0 && errno) { if (r < 0 && errno) {
if (errno == ESPIPE) if (errno == ESPIPE)
fptr->mode |= FMODE_DUPLEX; fptr->mode |= FMODE_DUPLEX;
return; if (!discard_rbuf) return;
} }
fptr->rbuf.off = 0; goto end;
fptr->rbuf.len = 0;
return;
} }
pos = lseek(fptr->fd, 0, SEEK_CUR); pos = lseek(fptr->fd, 0, SEEK_CUR);
if (pos < 0 && errno) { if (pos < 0 && errno) {
if (errno == ESPIPE) if (errno == ESPIPE)
fptr->mode |= FMODE_DUPLEX; fptr->mode |= FMODE_DUPLEX;
return; if (!discard_rbuf) goto end;
} }
/* add extra offset for removed '\r' in rbuf */ /* add extra offset for removed '\r' in rbuf */
@ -700,8 +699,10 @@ io_unread(rb_io_t *fptr)
} }
} }
free(buf); free(buf);
end:
fptr->rbuf.off = 0; fptr->rbuf.off = 0;
fptr->rbuf.len = 0; fptr->rbuf.len = 0;
clear_codeconv(fptr);
return; return;
} }
@ -720,7 +721,7 @@ set_binary_mode_with_seek_cur(rb_io_t *fptr)
if (fptr->rbuf.len == 0 || fptr->mode & FMODE_DUPLEX) { if (fptr->rbuf.len == 0 || fptr->mode & FMODE_DUPLEX) {
return setmode(fptr->fd, O_BINARY); return setmode(fptr->fd, O_BINARY);
} }
flush_before_seek(fptr); flush_before_seek(fptr, false);
return setmode(fptr->fd, O_BINARY); return setmode(fptr->fd, O_BINARY);
} }
#define SET_BINARY_MODE_WITH_SEEK_CUR(fptr) set_binary_mode_with_seek_cur(fptr) #define SET_BINARY_MODE_WITH_SEEK_CUR(fptr) set_binary_mode_with_seek_cur(fptr)
@ -916,7 +917,7 @@ rb_io_s_try_convert(VALUE dummy, VALUE io)
#if !RUBY_CRLF_ENVIRONMENT #if !RUBY_CRLF_ENVIRONMENT
static void static void
io_unread(rb_io_t *fptr) io_unread(rb_io_t *fptr, bool discard_rbuf)
{ {
rb_off_t r; rb_off_t r;
rb_io_check_closed(fptr); rb_io_check_closed(fptr);
@ -928,10 +929,11 @@ io_unread(rb_io_t *fptr)
if (r < 0 && errno) { if (r < 0 && errno) {
if (errno == ESPIPE) if (errno == ESPIPE)
fptr->mode |= FMODE_DUPLEX; fptr->mode |= FMODE_DUPLEX;
return; if (!discard_rbuf) return;
} }
fptr->rbuf.off = 0; fptr->rbuf.off = 0;
fptr->rbuf.len = 0; fptr->rbuf.len = 0;
clear_codeconv(fptr);
return; return;
} }
#endif #endif
@ -972,17 +974,17 @@ io_ungetbyte(VALUE str, rb_io_t *fptr)
} }
static rb_io_t * static rb_io_t *
flush_before_seek(rb_io_t *fptr) flush_before_seek(rb_io_t *fptr, bool discard_rbuf)
{ {
if (io_fflush(fptr) < 0) if (io_fflush(fptr) < 0)
rb_sys_fail_on_write(fptr); rb_sys_fail_on_write(fptr);
io_unread(fptr); io_unread(fptr, discard_rbuf);
errno = 0; errno = 0;
return fptr; return fptr;
} }
#define io_seek(fptr, ofs, whence) (errno = 0, lseek(flush_before_seek(fptr)->fd, (ofs), (whence))) #define io_seek(fptr, ofs, whence) (errno = 0, lseek(flush_before_seek(fptr, true)->fd, (ofs), (whence)))
#define io_tell(fptr) lseek(flush_before_seek(fptr)->fd, 0, SEEK_CUR) #define io_tell(fptr) lseek(flush_before_seek(fptr, false)->fd, 0, SEEK_CUR)
#ifndef SEEK_CUR #ifndef SEEK_CUR
# define SEEK_SET 0 # define SEEK_SET 0
@ -1050,7 +1052,7 @@ rb_io_check_writable(rb_io_t *fptr)
rb_raise(rb_eIOError, "not opened for writing"); rb_raise(rb_eIOError, "not opened for writing");
} }
if (fptr->rbuf.len) { if (fptr->rbuf.len) {
io_unread(fptr); io_unread(fptr, true);
} }
} }
@ -2376,7 +2378,7 @@ rb_io_flush_raw(VALUE io, int sync)
rb_sys_fail_on_write(fptr); rb_sys_fail_on_write(fptr);
} }
if (fptr->mode & FMODE_READABLE) { if (fptr->mode & FMODE_READABLE) {
io_unread(fptr); io_unread(fptr, true);
} }
return io; return io;
@ -5471,7 +5473,6 @@ maygvl_fclose(FILE *file, int keepgvl)
} }
static void free_io_buffer(rb_io_buffer_t *buf); static void free_io_buffer(rb_io_buffer_t *buf);
static void clear_codeconv(rb_io_t *fptr);
static void static void
fptr_finalize_flush(rb_io_t *fptr, int noraise, int keepgvl, fptr_finalize_flush(rb_io_t *fptr, int noraise, int keepgvl,
@ -8314,7 +8315,7 @@ io_reopen(VALUE io, VALUE nfile)
rb_sys_fail_on_write(fptr); rb_sys_fail_on_write(fptr);
} }
else { else {
flush_before_seek(fptr); flush_before_seek(fptr, true);
} }
if (orig->mode & FMODE_READABLE) { if (orig->mode & FMODE_READABLE) {
pos = io_tell(orig); pos = io_tell(orig);
@ -15069,6 +15070,9 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y)
* A new stream has position zero (and line number zero); * A new stream has position zero (and line number zero);
* method +rewind+ resets the position (and line number) to zero. * method +rewind+ resets the position (and line number) to zero.
* *
* These methods discard {buffers}[rdoc-ref:IO@Buffering] and the
* Encoding::Converter instances used for that \IO.
*
* The relevant methods: * The relevant methods:
* *
* - IO#tell (aliased as +#pos+): Returns the current position (in bytes) in the stream. * - IO#tell (aliased as +#pos+): Returns the current position (in bytes) in the stream.
@ -15375,6 +15379,7 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y)
* - IO#putc: Writes a character to the stream. * - IO#putc: Writes a character to the stream.
* - IO#each_char: Reads each remaining character in the stream, * - IO#each_char: Reads each remaining character in the stream,
* passing the character to the given block. * passing the character to the given block.
*
* == Byte \IO * == Byte \IO
* *
* You can process an \IO stream byte-by-byte using these methods: * You can process an \IO stream byte-by-byte using these methods:

View File

@ -350,6 +350,19 @@ class TestIO < Test::Unit::TestCase
end) end)
end end
def test_ungetc_with_seek
make_tempfile {|t|
t.open
t.write('0123456789')
t.rewind
t.ungetc('a')
t.seek(2, :SET)
assert_equal('2', t.getc)
}
end
def test_ungetbyte def test_ungetbyte
make_tempfile {|t| make_tempfile {|t|
t.open t.open
@ -373,6 +386,19 @@ class TestIO < Test::Unit::TestCase
} }
end end
def test_ungetbyte_with_seek
make_tempfile {|t|
t.open
t.write('0123456789')
t.rewind
t.ungetbyte('a'.ord)
t.seek(2, :SET)
assert_equal('2'.ord, t.getbyte)
}
end
def test_each_byte def test_each_byte
pipe(proc do |w| pipe(proc do |w|
w << "abc def" w << "abc def"