[rubygems/rubygems] String search based parsing of compact index versions
This significantly reduces memory usage. https://github.com/rubygems/rubygems/commit/8a76506c90
This commit is contained in:
parent
b5c8fb9a3f
commit
bc40d06091
@ -4,6 +4,29 @@ require "pathname"
|
|||||||
require "set"
|
require "set"
|
||||||
|
|
||||||
module Bundler
|
module Bundler
|
||||||
|
# The CompactIndexClient is responsible for fetching and parsing the compact index.
|
||||||
|
#
|
||||||
|
# The compact index is a set of caching optimized files that are used to fetch gem information.
|
||||||
|
# The files are:
|
||||||
|
# - names: a list of all gem names
|
||||||
|
# - versions: a list of all gem versions
|
||||||
|
# - info/[gem]: a list of all versions of a gem
|
||||||
|
#
|
||||||
|
# The client is instantiated with:
|
||||||
|
# - `directory`: the root directory where the cache files are stored.
|
||||||
|
# - `fetcher`: (optional) an object that responds to #call(uri_path, headers) and returns an http response.
|
||||||
|
# If the `fetcher` is not provided, the client will only read cached files from disk.
|
||||||
|
#
|
||||||
|
# The client is organized into:
|
||||||
|
# - `Updater`: updates the cached files on disk using the fetcher.
|
||||||
|
# - `Cache`: calls the updater, caches files, read and return them from disk
|
||||||
|
# - `Parser`: parses the compact index file data
|
||||||
|
# - `CacheFile`: a concurrency safe file reader/writer that verifies checksums
|
||||||
|
#
|
||||||
|
# The client is intended to optimize memory usage and performance.
|
||||||
|
# It is called 100s or 1000s of times, parsing files with hundreds of thousands of lines.
|
||||||
|
# It may be called concurrently without global interpreter lock in some Rubies.
|
||||||
|
# As a result, some methods may look more complex than necessary to save memory or time.
|
||||||
class CompactIndexClient
|
class CompactIndexClient
|
||||||
# NOTE: MD5 is here not because we expect a server to respond with it, but
|
# NOTE: MD5 is here not because we expect a server to respond with it, but
|
||||||
# because we use it to generate the etag on first request during the upgrade
|
# because we use it to generate the etag on first request during the upgrade
|
||||||
|
@ -10,6 +10,8 @@ module Bundler
|
|||||||
@info_checksums = nil
|
@info_checksums = nil
|
||||||
@versions_by_name = nil
|
@versions_by_name = nil
|
||||||
@available = nil
|
@available = nil
|
||||||
|
@gem_parser = nil
|
||||||
|
@versions_data = nil
|
||||||
end
|
end
|
||||||
|
|
||||||
def names
|
def names
|
||||||
@ -38,46 +40,71 @@ module Bundler
|
|||||||
end
|
end
|
||||||
|
|
||||||
def info(name)
|
def info(name)
|
||||||
data = @compact_index.info(name, info_checksums[name])
|
data = @compact_index.info(name, info_checksum(name))
|
||||||
lines(data).map {|line| gem_parser.parse(line).unshift(name) }
|
lines(data).map {|line| gem_parser.parse(line).unshift(name) }
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# parse the last, most recently updated line of the versions file to determine availability
|
||||||
def available?
|
def available?
|
||||||
return @available unless @available.nil?
|
return @available unless @available.nil?
|
||||||
@available = !info_checksums.empty?
|
return @available = false unless versions_data&.size&.nonzero?
|
||||||
|
|
||||||
|
line_end = versions_data.size - 1
|
||||||
|
return @available = false if versions_data[line_end] != "\n"
|
||||||
|
|
||||||
|
line_start = versions_data.rindex("\n", line_end - 1)
|
||||||
|
line_start ||= -1 # allow a single line versions file
|
||||||
|
|
||||||
|
@available = !split_last_word(versions_data, line_start + 1, line_end).nil?
|
||||||
end
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
def info_checksums
|
# Search for a line starting with gem name, then return last space-separated word (the checksum)
|
||||||
@info_checksums ||= lines(@compact_index.versions).each_with_object({}) do |line, checksums|
|
def info_checksum(name)
|
||||||
parse_version_checksum(line, checksums)
|
return unless versions_data
|
||||||
end
|
return unless (line_start = rindex_of_gem(name))
|
||||||
end
|
return unless (line_end = versions_data.index("\n", line_start))
|
||||||
|
split_last_word(versions_data, line_start, line_end)
|
||||||
def lines(data)
|
|
||||||
return [] if data.nil? || data.empty?
|
|
||||||
lines = data.split("\n")
|
|
||||||
header = lines.index("---")
|
|
||||||
header ? lines[header + 1..-1] : lines
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def gem_parser
|
def gem_parser
|
||||||
@gem_parser ||= GemParser.new
|
@gem_parser ||= GemParser.new
|
||||||
end
|
end
|
||||||
|
|
||||||
# This is mostly the same as `split(" ", 3)` but it avoids allocating extra objects.
|
def versions_data
|
||||||
# This method gets called at least once for every gem when parsing versions.
|
@versions_data ||= begin
|
||||||
def parse_version_checksum(line, checksums)
|
data = @compact_index.versions
|
||||||
line.freeze # allows slicing into the string to not allocate a copy of the line
|
strip_header!(data) if data
|
||||||
name_end = line.index(" ")
|
data.freeze
|
||||||
checksum_start = line.index(" ", name_end + 1) + 1
|
end
|
||||||
checksum_end = line.size - checksum_start
|
end
|
||||||
# freeze name since it is used as a hash key
|
|
||||||
# pre-freezing means a frozen copy isn't created
|
def rindex_of_gem(name)
|
||||||
name = line[0, name_end].freeze
|
if (pos = versions_data.rindex("\n#{name} "))
|
||||||
checksum = line[checksum_start, checksum_end]
|
pos + 1
|
||||||
checksums[name] = checksum
|
elsif versions_data.start_with?("#{name} ")
|
||||||
|
0
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# This is similar to `string.split(" ").last` but it avoids allocating extra objects.
|
||||||
|
def split_last_word(string, line_start, line_end)
|
||||||
|
return unless line_start < line_end && line_start >= 0
|
||||||
|
word_start = string.rindex(" ", line_end).to_i + 1
|
||||||
|
return if word_start < line_start
|
||||||
|
string[word_start, line_end - word_start]
|
||||||
|
end
|
||||||
|
|
||||||
|
def lines(string)
|
||||||
|
return [] if string.nil? || string.empty?
|
||||||
|
strip_header!(string)
|
||||||
|
string.split("\n")
|
||||||
|
end
|
||||||
|
|
||||||
|
def strip_header!(string)
|
||||||
|
header_end = string.index("---\n")
|
||||||
|
string.slice!(0, header_end + 4) if header_end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -20,7 +20,9 @@ RSpec.describe Bundler::CompactIndexClient::Parser do
|
|||||||
|
|
||||||
let(:compact_index) { TestCompactIndexClient.new(names, versions, info_data) }
|
let(:compact_index) { TestCompactIndexClient.new(names, versions, info_data) }
|
||||||
let(:names) { "a\nb\nc\n" }
|
let(:names) { "a\nb\nc\n" }
|
||||||
let(:versions) { <<~VERSIONS }
|
let(:versions) { <<~VERSIONS.dup }
|
||||||
|
created_at: 2024-05-01T00:00:04Z
|
||||||
|
---
|
||||||
a 1.0.0,1.0.1,1.1.0 aaa111
|
a 1.0.0,1.0.1,1.1.0 aaa111
|
||||||
b 2.0.0,2.0.0-java bbb222
|
b 2.0.0,2.0.0-java bbb222
|
||||||
c 3.0.0,3.0.3,3.3.3 ccc333
|
c 3.0.0,3.0.3,3.3.3 ccc333
|
||||||
@ -33,7 +35,8 @@ RSpec.describe Bundler::CompactIndexClient::Parser do
|
|||||||
"c" => { "ccc333yanked" => c_info },
|
"c" => { "ccc333yanked" => c_info },
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
let(:a_info) { <<~INFO }
|
let(:a_info) { <<~INFO.dup }
|
||||||
|
---
|
||||||
1.0.0 |checksum:aaa1,ruby:>= 3.0.0,rubygems:>= 3.2.3
|
1.0.0 |checksum:aaa1,ruby:>= 3.0.0,rubygems:>= 3.2.3
|
||||||
1.0.1 |checksum:aaa2,ruby:>= 3.0.0,rubygems:>= 3.2.3
|
1.0.1 |checksum:aaa2,ruby:>= 3.0.0,rubygems:>= 3.2.3
|
||||||
1.1.0 |checksum:aaa3,ruby:>= 3.0.0,rubygems:>= 3.2.3
|
1.1.0 |checksum:aaa3,ruby:>= 3.0.0,rubygems:>= 3.2.3
|
||||||
@ -52,10 +55,45 @@ RSpec.describe Bundler::CompactIndexClient::Parser do
|
|||||||
expect(parser).to be_available
|
expect(parser).to be_available
|
||||||
end
|
end
|
||||||
|
|
||||||
it "returns false when versions are not available" do
|
it "returns true when versions has only one gem" do
|
||||||
|
compact_index.versions = +"a 1.0.0 aaa1\n"
|
||||||
|
expect(parser).to be_available
|
||||||
|
end
|
||||||
|
|
||||||
|
it "returns true when versions has a gem and a header" do
|
||||||
|
compact_index.versions = +"---\na 1.0.0 aaa1\n"
|
||||||
|
expect(parser).to be_available
|
||||||
|
end
|
||||||
|
|
||||||
|
it "returns true when versions has a gem and a header with header data" do
|
||||||
|
compact_index.versions = +"created_at: 2024-05-01T00:00:04Z\n---\na 1.0.0 aaa1\n"
|
||||||
|
expect(parser).to be_available
|
||||||
|
end
|
||||||
|
|
||||||
|
it "returns false when versions has only the header" do
|
||||||
|
compact_index.versions = +"---\n"
|
||||||
|
expect(parser).not_to be_available
|
||||||
|
end
|
||||||
|
|
||||||
|
it "returns false when versions has only the header with header data" do
|
||||||
|
compact_index.versions = +"created_at: 2024-05-01T00:00:04Z\n---\n"
|
||||||
|
expect(parser).not_to be_available
|
||||||
|
end
|
||||||
|
|
||||||
|
it "returns false when versions index is not available" do
|
||||||
compact_index.versions = nil
|
compact_index.versions = nil
|
||||||
expect(parser).not_to be_available
|
expect(parser).not_to be_available
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "returns false when versions is empty" do
|
||||||
|
compact_index.versions = +""
|
||||||
|
expect(parser).not_to be_available
|
||||||
|
end
|
||||||
|
|
||||||
|
it "returns false when versions ends improperly without a newline" do
|
||||||
|
compact_index.versions = "a 1.0.0 aaa1"
|
||||||
|
expect(parser).not_to be_available
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
describe "#names" do
|
describe "#names" do
|
||||||
|
Loading…
x
Reference in New Issue
Block a user