[rubygems/rubygems] Add TarReader::Entry#seek to seek within the tar file entry

TarReader#each previously implemented a partial version of seek.
This code moved to Entry#seek for use from TarReader#each.

Entry#close now returns nil instead of true, like IO#close.

Closing an Entry now seeks to the end of the Entry, seeking past
any remaining zero byte tar file padding and moving the io to the
correcty position to read the next file in the archive.

Uses seek for Entry#rewind and #pos=, fixing the tar->gzip->tar nested
rewind that would break previous to this change.

Add Entry.open that behaves more like File.open.

https://github.com/rubygems/rubygems/commit/f5149565d5
This commit is contained in:
Martin Emde 2023-02-16 18:03:41 -08:00 committed by git
parent 719a7726d1
commit 85a1738ab3
6 changed files with 281 additions and 86 deletions

View File

@ -53,39 +53,11 @@ class Gem::Package::TarReader
def each def each
return enum_for __method__ unless block_given? return enum_for __method__ unless block_given?
use_seek = @io.respond_to?(:seek)
until @io.eof? do until @io.eof? do
header = Gem::Package::TarHeader.from @io header = Gem::Package::TarHeader.from @io
return if header.empty? return if header.empty?
entry = Gem::Package::TarReader::Entry.new header, @io entry = Gem::Package::TarReader::Entry.new header, @io
size = entry.header.size
yield entry yield entry
skip = (512 - (size % 512)) % 512
pending = size - entry.bytes_read
if use_seek
begin
# avoid reading if the @io supports seeking
@io.seek pending, IO::SEEK_CUR
pending = 0
rescue Errno::EINVAL
end
end
# if seeking isn't supported or failed
while pending > 0 do
bytes_read = @io.read([pending, 4096].min).size
raise UnexpectedEOF if @io.eof?
pending -= bytes_read
end
@io.read skip # discard trailing zeros
# make sure nobody can use #read, #getc or #rewind anymore
entry.close entry.close
end end
end end

View File

@ -8,6 +8,20 @@
# Class for reading entries out of a tar file # Class for reading entries out of a tar file
class Gem::Package::TarReader::Entry class Gem::Package::TarReader::Entry
##
# Creates a new tar entry for +header+ that will be read from +io+
# If a block is given, the entry is yielded and then closed.
def self.open(header, io, &block)
entry = new header, io
return entry unless block_given?
begin
yield entry
ensure
entry.close
end
end
## ##
# Header for this tar entry # Header for this tar entry
@ -21,6 +35,7 @@ class Gem::Package::TarReader::Entry
@header = header @header = header
@io = io @io = io
@orig_pos = @io.pos @orig_pos = @io.pos
@end_pos = @orig_pos + @header.size
@read = 0 @read = 0
end end
@ -39,7 +54,14 @@ class Gem::Package::TarReader::Entry
# Closes the tar entry # Closes the tar entry
def close def close
return if closed?
# Seek to the end of the entry if it wasn't fully read
seek(0, IO::SEEK_END)
# discard trailing zeros
skip = (512 - (@header.size % 512)) % 512
@io.read(skip)
@closed = true @closed = true
nil
end end
## ##
@ -117,6 +139,14 @@ class Gem::Package::TarReader::Entry
bytes_read bytes_read
end end
##
# Seek to the position in the tar entry
def pos=(new_pos)
seek(new_pos, IO::SEEK_SET)
new_pos
end
def size def size
@header.size @header.size
end end
@ -157,13 +187,62 @@ class Gem::Package::TarReader::Entry
outbuf outbuf
end end
##
# Seeks to +offset+ bytes into the tar file entry
# +whence+ can be IO::SEEK_SET, IO::SEEK_CUR, or IO::SEEK_END
def seek(offset, whence = IO::SEEK_SET)
check_closed
new_pos =
case whence
when IO::SEEK_SET then @orig_pos + offset
when IO::SEEK_CUR then @io.pos + offset
when IO::SEEK_END then @end_pos + offset
else
raise ArgumentError, "invalid whence"
end
if new_pos < @orig_pos
new_pos = @orig_pos
elsif new_pos > @end_pos
new_pos = @end_pos
end
pending = new_pos - @io.pos
if @io.respond_to?(:seek)
begin
# avoid reading if the @io supports seeking
@io.seek new_pos, IO::SEEK_SET
pending = 0
rescue Errno::EINVAL
end
end
# if seeking isn't supported or failed
# negative seek requires that we rewind and read
if pending < 0
@io.rewind
pending = new_pos
end
while pending > 0 do
size_read = @io.read([pending, 4096].min).size
raise UnexpectedEOF if @io.eof?
pending -= size_read
end
@read = @io.pos - @orig_pos
0
end
## ##
# Rewinds to the beginning of the tar file entry # Rewinds to the beginning of the tar file entry
def rewind def rewind
check_closed check_closed
seek(0, IO::SEEK_SET)
@io.pos = @orig_pos
@read = 0
end end
end end

View File

@ -94,27 +94,32 @@ class Gem::Package::TarTestCase < Gem::TestCase
] ]
h = arr.join h = arr.join
ret = h + "\0" * (512 - h.size) ret = ASCIIZ(h, 512)
assert_equal(512, ret.size) assert_equal(512, ret.size)
ret ret
end end
def tar_dir_header(name, prefix, mode, mtime) def header_with_checksum(type, fname, dname, length, mode, mtime, linkname = "")
h = header("5", name, prefix, 0, mode, mtime) h = header(type, fname, dname, length, mode, mtime, nil, linkname)
checksum = calc_checksum(h) checksum = calc_checksum(h)
header("5", name, prefix, 0, mode, mtime, checksum) header(type, fname, dname, length, mode, mtime, checksum, linkname)
end
def tar_dir_header(name, prefix, mode, mtime)
header_with_checksum("5", name, prefix, 0, mode, mtime)
end end
def tar_file_header(fname, dname, mode, length, mtime) def tar_file_header(fname, dname, mode, length, mtime)
h = header("0", fname, dname, length, mode, mtime) header_with_checksum("0", fname, dname, length, mode, mtime)
checksum = calc_checksum(h)
header("0", fname, dname, length, mode, mtime, checksum)
end end
def tar_symlink_header(fname, prefix, mode, mtime, linkname) def tar_symlink_header(fname, dname, mode, mtime, linkname)
h = header("2", fname, prefix, 0, mode, mtime, nil, linkname) header_with_checksum("2", fname, dname, 0, mode, mtime, linkname)
checksum = calc_checksum(h) end
header("2", fname, prefix, 0, mode, mtime, checksum, linkname)
def tar_file_contents(content)
pad = (512 - (content.size % 512)) % 512
content + "\0" * pad
end end
def to_oct(n, pad_size) def to_oct(n, pad_size)
@ -122,11 +127,15 @@ class Gem::Package::TarTestCase < Gem::TestCase
end end
def util_entry(tar) def util_entry(tar)
io = TempIO.new tar io = tar.respond_to?(:read) ? tar : TempIO.new(tar)
header = Gem::Package::TarHeader.from io header = Gem::Package::TarHeader.from io
Gem::Package::TarReader::Entry.new header, io Gem::Package::TarReader::Entry.open header, io
end
def close_util_entry(entry)
entry.instance_variable_get(:@io).close!
end end
def util_dir_entry def util_dir_entry
@ -136,4 +145,30 @@ class Gem::Package::TarTestCase < Gem::TestCase
def util_symlink_entry def util_symlink_entry
util_entry tar_symlink_header("foo", "bar", 0, Time.now, "link") util_entry tar_symlink_header("foo", "bar", 0, Time.now, "link")
end end
def util_tar(&block)
tar_io = StringIO.new
Gem::Package::TarWriter.new(tar_io, &block)
tar_io.rewind
tar_io
end
def util_tar_gz(&block)
tar_io = util_tar(&block)
StringIO.new util_gzip(tar_io.string)
end
def util_gem_data_tar(spec = nil, &block)
data_tgz = util_tar_gz(&block)
util_tar do |tar|
if spec
tar.add_file "metadata.gz", 0444 do |io|
io.write util_gzip(spec.to_yaml)
end
end
tar.add_file "data.tar.gz", 0644 do |io|
io.write data_tgz.string
end
end
end
end end

View File

@ -1187,29 +1187,4 @@ class TestGemPackage < Gem::Package::TarTestCase
assert_equal %w[lib/code.rb], package.contents assert_equal %w[lib/code.rb], package.contents
end end
def util_tar
tar_io = StringIO.new
Gem::Package::TarWriter.new tar_io do |tar|
yield tar
end
tar_io.rewind
tar_io
end
def util_tar_gz(&block)
tar_io = util_tar(&block)
tgz_io = StringIO.new
# can't wrap TarWriter because it seeks
Zlib::GzipWriter.wrap tgz_io do |io|
io.write tar_io.string
end
StringIO.new tgz_io.string
end
end end

View File

@ -56,12 +56,14 @@ class TestGemPackageTarReader < Gem::Package::TarTestCase
io = TempIO.new tar io = TempIO.new tar
Gem::Package::TarReader.new io do |tar_reader| Gem::Package::TarReader.new io do |tar_reader|
tar_reader.seek "baz/bar" do |entry| retval = tar_reader.seek "baz/bar" do |entry|
assert_kind_of Gem::Package::TarReader::Entry, entry assert_kind_of Gem::Package::TarReader::Entry, entry
assert_equal "baz/bar", entry.full_name assert_equal "baz/bar", entry.full_name
entry.read
end end
assert_equal "", retval
assert_equal 0, io.pos assert_equal 0, io.pos
end end
ensure ensure
@ -84,4 +86,49 @@ class TestGemPackageTarReader < Gem::Package::TarTestCase
ensure ensure
io.close! io.close!
end end
def test_read_in_gem_data
gem_tar = util_gem_data_tar do |tar|
tar.add_file "lib/code.rb", 0444 do |io|
io.write "# lib/code.rb"
end
end
count = 0
Gem::Package::TarReader.new(gem_tar).each do |entry|
next unless entry.full_name == "data.tar.gz"
Zlib::GzipReader.wrap entry do |gzio|
Gem::Package::TarReader.new(gzio).each do |contents_entry|
assert_equal "# lib/code.rb", contents_entry.read
count += 1
end
end
end
assert_equal 1, count, "should have found one file"
end
def test_seek_in_gem_data
gem_tar = util_gem_data_tar do |tar|
tar.add_file "lib/code.rb", 0444 do |io|
io.write "# lib/code.rb"
end
tar.add_file "lib/foo.rb", 0444 do |io|
io.write "# lib/foo.rb"
end
end
count = 0
Gem::Package::TarReader.new(gem_tar).seek("data.tar.gz") do |entry|
Zlib::GzipReader.wrap entry do |gzio|
Gem::Package::TarReader.new(gzio).seek("lib/foo.rb") do |contents_entry|
assert_equal "# lib/foo.rb", contents_entry.read
count += 1
end
end
end
assert_equal 1, count, "should have found one file"
end
end end

View File

@ -10,8 +10,7 @@ class TestGemPackageTarReaderEntry < Gem::Package::TarTestCase
@tar = String.new @tar = String.new
@tar << tar_file_header("lib/foo", "", 0, @contents.size, Time.now) @tar << tar_file_header("lib/foo", "", 0, @contents.size, Time.now)
@tar << @contents @tar << tar_file_contents(@contents)
@tar << "\0" * (512 - (@tar.size % 512))
@entry = util_entry @tar @entry = util_entry @tar
end end
@ -21,8 +20,35 @@ class TestGemPackageTarReaderEntry < Gem::Package::TarTestCase
super super
end end
def close_util_entry(entry) def test_open
entry.instance_variable_get(:@io).close! io = TempIO.new @tar
header = Gem::Package::TarHeader.from io
retval = Gem::Package::TarReader::Entry.open header, io do |entry|
entry.getc
end
assert_equal "a", retval
assert_equal @tar.size, io.pos, "should have read to end of entry"
end
def test_open_closes_entry
io = TempIO.new @tar
header = Gem::Package::TarHeader.from io
entry = nil
Gem::Package::TarReader::Entry.open header, io do |e|
entry = e
end
assert entry.closed?
assert_raise(IOError) { entry.getc }
end
def test_open_returns_entry
io = TempIO.new @tar
header = Gem::Package::TarHeader.from io
entry = Gem::Package::TarReader::Entry.open header, io
refute entry.closed?
assert_equal ?a, entry.getc
assert_nil entry.close
assert entry.closed?
end end
def test_bytes_read def test_bytes_read
@ -177,6 +203,21 @@ class TestGemPackageTarReaderEntry < Gem::Package::TarTestCase
assert_equal char, @entry.getc assert_equal char, @entry.getc
end end
def test_seek
@entry.seek(50)
assert_equal 50, @entry.pos
assert_equal @contents[50..-1], @entry.read, "read remaining after seek"
@entry.seek(-50, IO::SEEK_CUR)
assert_equal @contents.size - 50, @entry.pos
assert_equal @contents[-50..-1], @entry.read, "read after stepping back 50 from the end"
@entry.seek(0, IO::SEEK_SET)
assert_equal 0, @entry.pos
assert_equal @contents, @entry.read, "read from beginning"
@entry.seek(-10, IO::SEEK_END)
assert_equal @contents.size - 10, @entry.pos
assert_equal @contents[-10..-1], @entry.read, "read from end"
end
def test_read_zero def test_read_zero
expected = StringIO.new("") expected = StringIO.new("")
assert_equal expected.read(0), @entry.read(0) assert_equal expected.read(0), @entry.read(0)
@ -187,28 +228,74 @@ class TestGemPackageTarReaderEntry < Gem::Package::TarTestCase
assert_equal expected.readpartial(0), @entry.readpartial(0) assert_equal expected.readpartial(0), @entry.readpartial(0)
end end
def util_zero_byte_entry
tar = String.new
tar << tar_file_header("lib/empty", "", 0, 0, Time.now)
tar << "\0" * (512 - (tar.size % 512))
util_entry tar
end
def test_zero_byte_file_read def test_zero_byte_file_read
zero_entry = util_zero_byte_entry zero_entry = util_entry(tar_file_header("foo", "", 0, 0, Time.now))
expected = StringIO.new("") expected = StringIO.new("")
assert_equal expected.read, zero_entry.read assert_equal expected.read, zero_entry.read
ensure ensure
close_util_entry(zero_entry) if zero_entry close_util_entry(zero_entry) if zero_entry
end end
def test_zero_byte_file_readpartial def test_zero_byte_file_readpartial
zero_entry = util_zero_byte_entry zero_entry = util_entry(tar_file_header("foo", "", 0, 0, Time.now))
expected = StringIO.new("") expected = StringIO.new("")
assert_equal expected.readpartial(0), zero_entry.readpartial(0) assert_equal expected.readpartial(0), zero_entry.readpartial(0)
ensure ensure
close_util_entry(zero_entry) if zero_entry close_util_entry(zero_entry) if zero_entry
end end
def test_read_from_gzip_io
tgz = util_gzip(@tar)
Zlib::GzipReader.wrap StringIO.new(tgz) do |gzio|
entry = util_entry(gzio)
assert_equal @contents, entry.read
entry.rewind
assert_equal @contents, entry.read, "second read after rewind should read same contents"
end
end
def test_read_from_gzip_io_with_non_zero_offset
contents2 = ("0".."9").to_a.join * 100
@tar << tar_file_header("lib/bar", "", 0, contents2.size, Time.now)
@tar << tar_file_contents(contents2)
tgz = util_gzip(@tar)
Zlib::GzipReader.wrap StringIO.new(tgz) do |gzio|
util_entry(gzio).close # skip the first entry so io.pos is not 0, preventing easy rewind
entry = util_entry(gzio)
assert_equal contents2, entry.read
entry.rewind
assert_equal contents2, entry.read, "second read after rewind should read same contents"
end
end
def test_seek_in_gzip_io_with_non_zero_offset
contents2 = ("0".."9").to_a.join * 100
@tar << tar_file_header("lib/bar", "", 0, contents2.size, Time.now)
@tar << tar_file_contents(contents2)
tgz = util_gzip(@tar)
Zlib::GzipReader.wrap StringIO.new(tgz) do |gzio|
util_entry(gzio).close # skip the first entry so io.pos is not 0
entry = util_entry(gzio)
entry.seek(50)
assert_equal 50, entry.pos
assert_equal contents2[50..-1], entry.read, "read remaining after seek"
entry.seek(-50, IO::SEEK_CUR)
assert_equal contents2.size - 50, entry.pos
assert_equal contents2[-50..-1], entry.read, "read after stepping back 50 from the end"
entry.seek(0, IO::SEEK_SET)
assert_equal 0, entry.pos
assert_equal contents2, entry.read, "read from beginning"
entry.seek(-10, IO::SEEK_END)
assert_equal contents2.size - 10, entry.pos
assert_equal contents2[-10..-1], entry.read, "read from end"
assert_equal contents2.size, entry.pos
end
end
end end