Opaque Etags for compact index requests

This changes the CompactIndexClient to store etags received from the
compact index in separate files rather than relying on the MD5 checksum
of the file as the etag.

Smoothes the upgrade from md5 etags to opaque by generating them when no
etag file exists. This should reduce the initial impact of changing the
caching behavior by reducing cache misses when the MD5 etag is the same.

Eventually, the MD5 behavior should be retired and the etag should be
considered completely opaque with no assumption that MD5 would match.
This commit is contained in:
Josef Šimánek 2023-06-27 02:36:59 +02:00 committed by Hiroshi SHIBATA
parent 794c879d19
commit 71a8daecd9
16 changed files with 714 additions and 157 deletions

View File

@ -5,7 +5,13 @@ require "set"
module Bundler
class CompactIndexClient
# NOTE: MD5 is here not because we expect a server to respond with it, but
# because we use it to generate the etag on first request during the upgrade
# to the compact index client that uses opaque etags saved to files.
# Remove once 2.5.0 has been out for a while.
SUPPORTED_DIGESTS = { "sha-256" => :SHA256, "md5" => :MD5 }.freeze
DEBUG_MUTEX = Thread::Mutex.new
def self.debug
return unless ENV["DEBUG_COMPACT_INDEX"]
DEBUG_MUTEX.synchronize { warn("[#{self}] #{yield}") }
@ -14,6 +20,7 @@ module Bundler
class Error < StandardError; end
require_relative "compact_index_client/cache"
require_relative "compact_index_client/cache_file"
require_relative "compact_index_client/updater"
attr_reader :directory
@ -54,13 +61,13 @@ module Bundler
def names
Bundler::CompactIndexClient.debug { "/names" }
update(@cache.names_path, "names")
update("names", @cache.names_path, @cache.names_etag_path)
@cache.names
end
def versions
Bundler::CompactIndexClient.debug { "/versions" }
update(@cache.versions_path, "versions")
update("versions", @cache.versions_path, @cache.versions_etag_path)
versions, @info_checksums_by_name = @cache.versions
versions
end
@ -76,36 +83,36 @@ module Bundler
def update_and_parse_checksums!
Bundler::CompactIndexClient.debug { "update_and_parse_checksums!" }
return @info_checksums_by_name if @parsed_checksums
update(@cache.versions_path, "versions")
update("versions", @cache.versions_path, @cache.versions_etag_path)
@info_checksums_by_name = @cache.checksums
@parsed_checksums = true
end
private
def update(local_path, remote_path)
def update(remote_path, local_path, local_etag_path)
Bundler::CompactIndexClient.debug { "update(#{local_path}, #{remote_path})" }
unless synchronize { @endpoints.add?(remote_path) }
Bundler::CompactIndexClient.debug { "already fetched #{remote_path}" }
return
end
@updater.update(local_path, url(remote_path))
@updater.update(url(remote_path), local_path, local_etag_path)
end
def update_info(name)
Bundler::CompactIndexClient.debug { "update_info(#{name})" }
path = @cache.info_path(name)
checksum = @updater.checksum_for_file(path)
unless existing = @info_checksums_by_name[name]
Bundler::CompactIndexClient.debug { "skipping updating info for #{name} since it is missing from versions" }
return
end
checksum = SharedHelpers.checksum_for_file(path, :MD5)
if checksum == existing
Bundler::CompactIndexClient.debug { "skipping updating info for #{name} since the versions checksum matches the local checksum" }
return
end
Bundler::CompactIndexClient.debug { "updating info for #{name} since the versions checksum #{existing} != the local checksum #{checksum}" }
update(path, "info/#{name}")
update("info/#{name}", path, @cache.info_etag_path(name))
end
def url(path)

View File

@ -9,11 +9,8 @@ module Bundler
def initialize(directory)
@directory = Pathname.new(directory).expand_path
info_roots.each do |dir|
SharedHelpers.filesystem_access(dir) do
FileUtils.mkdir_p(dir)
end
end
info_roots.each {|dir| mkdir(dir) }
mkdir(info_etag_root)
end
def names
@ -24,6 +21,10 @@ module Bundler
directory.join("names")
end
def names_etag_path
directory.join("names.etag")
end
def versions
versions_by_name = Hash.new {|hash, key| hash[key] = [] }
info_checksums_by_name = {}
@ -49,6 +50,10 @@ module Bundler
directory.join("versions")
end
def versions_etag_path
directory.join("versions.etag")
end
def checksums
checksums = {}
@ -76,8 +81,19 @@ module Bundler
end
end
def info_etag_path(name)
name = name.to_s
info_etag_root.join("#{name}-#{SharedHelpers.digest(:MD5).hexdigest(name).downcase}")
end
private
def mkdir(dir)
SharedHelpers.filesystem_access(dir) do
FileUtils.mkdir_p(dir)
end
end
def lines(path)
return [] unless path.file?
lines = SharedHelpers.filesystem_access(path, :read, &:read).split("\n")
@ -96,6 +112,10 @@ module Bundler
directory.join("info-special-characters"),
]
end
def info_etag_root
directory.join("info-etags")
end
end
end
end

View File

@ -0,0 +1,153 @@
# frozen_string_literal: true
require_relative "../vendored_fileutils"
require "rubygems/package"
module Bundler
class CompactIndexClient
# write cache files in a way that is robust to concurrent modifications
# if digests are given, the checksums will be verified
class CacheFile
DEFAULT_FILE_MODE = 0o644
private_constant :DEFAULT_FILE_MODE
class Error < RuntimeError; end
class ClosedError < Error; end
class DigestMismatchError < Error
def initialize(digests, expected_digests)
super "Calculated checksums #{digests.inspect} did not match expected #{expected_digests.inspect}."
end
end
# Initialize with a copy of the original file, then yield the instance.
def self.copy(path, &block)
new(path) do |file|
file.initialize_digests
SharedHelpers.filesystem_access(path, :read) do
path.open("rb") do |s|
file.open {|f| IO.copy_stream(s, f) }
end
end
yield file
end
end
# Write data to a temp file, then replace the original file with it verifying the digests if given.
def self.write(path, data, digests = nil)
return unless data
new(path) do |file|
file.digests = digests
file.write(data)
end
end
attr_reader :original_path, :path
def initialize(original_path, &block)
@original_path = original_path
@perm = original_path.file? ? original_path.stat.mode : DEFAULT_FILE_MODE
@path = original_path.sub(/$/, ".#{$$}.tmp")
return unless block_given?
begin
yield self
ensure
close
end
end
def size
path.size
end
# initialize the digests using CompactIndexClient::SUPPORTED_DIGESTS, or a subset based on keys.
def initialize_digests(keys = nil)
@digests = keys ? SUPPORTED_DIGESTS.slice(*keys) : SUPPORTED_DIGESTS.dup
@digests.transform_values! {|algo_class| SharedHelpers.digest(algo_class).new }
end
# reset the digests so they don't contain any previously read data
def reset_digests
@digests&.each_value(&:reset)
end
# set the digests that will be verified at the end
def digests=(expected_digests)
@expected_digests = expected_digests
if @expected_digests.nil?
@digests = nil
elsif @digests
@digests = @digests.slice(*@expected_digests.keys)
else
initialize_digests(@expected_digests.keys)
end
end
# remove this method when we stop generating md5 digests for legacy etags
def md5
@digests && @digests["md5"]
end
def digests?
@digests&.any?
end
# Open the temp file for writing, reusing original permissions, yielding the IO object.
def open(write_mode = "wb", perm = @perm, &block)
raise ClosedError, "Cannot reopen closed file" if @closed
SharedHelpers.filesystem_access(path, :write) do
path.open(write_mode, perm) do |f|
yield digests? ? Gem::Package::DigestIO.new(f, @digests) : f
end
end
end
# Returns false without appending when no digests since appending is too error prone to do without digests.
def append(data)
return false unless digests?
open("a") {|f| f.write data }
verify && commit
end
def write(data)
reset_digests
open {|f| f.write data }
commit!
end
def commit!
verify || raise(DigestMismatchError.new(@base64digests, @expected_digests))
commit
end
# Verify the digests, returning true on match, false on mismatch.
def verify
return true unless @expected_digests && digests?
@base64digests = @digests.transform_values!(&:base64digest)
@digests = nil
@base64digests.all? {|algo, digest| @expected_digests[algo] == digest }
end
# Replace the original file with the temp file without verifying digests.
# The file is permanently closed.
def commit
raise ClosedError, "Cannot commit closed file" if @closed
SharedHelpers.filesystem_access(original_path, :write) do
FileUtils.mv(path, original_path)
end
@closed = true
end
# Remove the temp file without replacing the original file.
# The file is permanently closed.
def close
return if @closed
FileUtils.remove_file(path) if @path&.file?
@closed = true
end
end
end
end

View File

@ -1,20 +1,11 @@
# frozen_string_literal: true
require_relative "../vendored_fileutils"
module Bundler
class CompactIndexClient
class Updater
class MisMatchedChecksumError < Error
def initialize(path, server_checksum, local_checksum)
@path = path
@server_checksum = server_checksum
@local_checksum = local_checksum
end
def message
"The checksum of /#{@path} does not match the checksum provided by the server! Something is wrong " \
"(local checksum is #{@local_checksum.inspect}, was expecting #{@server_checksum.inspect})."
class MismatchedChecksumError < Error
def initialize(path, message)
super "The checksum of /#{path} does not match the checksum provided by the server! Something is wrong. #{message}"
end
end
@ -22,101 +13,103 @@ module Bundler
@fetcher = fetcher
end
def update(local_path, remote_path, retrying = nil)
headers = {}
local_temp_path = local_path.sub(/$/, ".#{$$}")
local_temp_path = local_temp_path.sub(/$/, ".retrying") if retrying
local_temp_path = local_temp_path.sub(/$/, ".tmp")
# first try to fetch any new bytes on the existing file
if retrying.nil? && local_path.file?
copy_file local_path, local_temp_path
headers["If-None-Match"] = etag_for(local_temp_path)
headers["Range"] =
if local_temp_path.size.nonzero?
# Subtract a byte to ensure the range won't be empty.
# Avoids 416 (Range Not Satisfiable) responses.
"bytes=#{local_temp_path.size - 1}-"
else
"bytes=#{local_temp_path.size}-"
end
end
response = @fetcher.call(remote_path, headers)
return nil if response.is_a?(Net::HTTPNotModified)
content = response.body
etag = (response["ETag"] || "").gsub(%r{\AW/}, "")
correct_response = SharedHelpers.filesystem_access(local_temp_path) do
if response.is_a?(Net::HTTPPartialContent) && local_temp_path.size.nonzero?
local_temp_path.open("a") {|f| f << slice_body(content, 1..-1) }
etag_for(local_temp_path) == etag
else
local_temp_path.open("wb") {|f| f << content }
etag.length.zero? || etag_for(local_temp_path) == etag
end
end
if correct_response
SharedHelpers.filesystem_access(local_path) do
FileUtils.mv(local_temp_path, local_path)
end
return nil
end
if retrying
raise MisMatchedChecksumError.new(remote_path, etag, etag_for(local_temp_path))
end
update(local_path, remote_path, :retrying)
def update(remote_path, local_path, etag_path)
append(remote_path, local_path, etag_path) || replace(remote_path, local_path, etag_path)
rescue CacheFile::DigestMismatchError => e
raise MismatchedChecksumError.new(remote_path, e.message)
rescue Zlib::GzipFile::Error
raise Bundler::HTTPError
ensure
FileUtils.remove_file(local_temp_path) if File.exist?(local_temp_path)
end
def etag_for(path)
sum = checksum_for_file(path)
sum ? %("#{sum}") : nil
end
def slice_body(body, range)
body.byteslice(range)
end
def checksum_for_file(path)
return nil unless path.file?
# This must use File.read instead of Digest.file().hexdigest
# because we need to preserve \n line endings on windows when calculating
# the checksum
SharedHelpers.filesystem_access(path, :read) do
File.open(path, "rb") do |f|
digest = SharedHelpers.digest(:MD5).new
buf = String.new(:capacity => 16_384, :encoding => Encoding::BINARY)
digest << buf while f.read(16_384, buf)
digest.hexdigest
end
end
end
private
def copy_file(source, dest)
SharedHelpers.filesystem_access(source, :read) do
File.open(source, "r") do |s|
SharedHelpers.filesystem_access(dest, :write) do
File.open(dest, "wb", s.stat.mode) do |f|
IO.copy_stream(s, f)
end
end
def append(remote_path, local_path, etag_path)
return false unless local_path.file? && local_path.size.nonzero?
CacheFile.copy(local_path) do |file|
etag = etag_path.read.tap(&:chomp!) if etag_path.file?
etag ||= generate_etag(etag_path, file) # Remove this after 2.5.0 has been out for a while.
# Subtract a byte to ensure the range won't be empty.
# Avoids 416 (Range Not Satisfiable) responses.
response = @fetcher.call(remote_path, request_headers(etag, file.size - 1))
break true if response.is_a?(Net::HTTPNotModified)
file.digests = parse_digests(response)
# server may ignore Range and return the full response
if response.is_a?(Net::HTTPPartialContent)
break false unless file.append(response.body.byteslice(1..-1))
else
file.write(response.body)
end
CacheFile.write(etag_path, etag(response))
true
end
end
# request without range header to get the full file or a 304 Not Modified
def replace(remote_path, local_path, etag_path)
etag = etag_path.read.tap(&:chomp!) if etag_path.file?
response = @fetcher.call(remote_path, request_headers(etag))
return true if response.is_a?(Net::HTTPNotModified)
CacheFile.write(local_path, response.body, parse_digests(response))
CacheFile.write(etag_path, etag(response))
end
def request_headers(etag, range_start = nil)
headers = {}
headers["Range"] = "bytes=#{range_start}-" if range_start
headers["If-None-Match"] = etag if etag
headers
end
def etag_for_request(etag_path)
etag_path.read.tap(&:chomp!) if etag_path.file?
end
# When first releasing this opaque etag feature, we want to generate the old MD5 etag
# based on the content of the file. After that it will always use the saved opaque etag.
# This transparently saves existing users with good caches from updating a bunch of files.
# Remove this behavior after 2.5.0 has been out for a while.
def generate_etag(etag_path, file)
etag = file.md5.hexdigest
CacheFile.write(etag_path, etag)
etag
end
def etag(response)
return unless response["ETag"]
etag = response["ETag"].delete_prefix("W/")
return if etag.delete_prefix!('"') && !etag.delete_suffix!('"')
etag
end
# Unwraps and returns a Hash of digest algorithms and base64 values
# according to RFC 8941 Structured Field Values for HTTP.
# https://www.rfc-editor.org/rfc/rfc8941#name-parsing-a-byte-sequence
# Ignores unsupported algorithms.
def parse_digests(response)
return unless header = response["Repr-Digest"] || response["Digest"]
digests = {}
header.split(",") do |param|
algorithm, value = param.split("=", 2)
algorithm.strip!
algorithm.downcase!
next unless SUPPORTED_DIGESTS.key?(algorithm)
next unless value = byte_sequence(value)
digests[algorithm] = value
end
digests.empty? ? nil : digests
end
# Unwrap surrounding colons (byte sequence)
# The wrapping characters must be matched or we return nil.
# Also handles quotes because right now rubygems.org sends them.
def byte_sequence(value)
return if value.delete_prefix!(":") && !value.delete_suffix!(":")
return if value.delete_prefix!('"') && !value.delete_suffix!('"')
value
end
end
end
end

View File

@ -13,7 +13,7 @@ module Bundler
undef_method(method_name)
define_method(method_name) do |*args, &blk|
method.bind(self).call(*args, &blk)
rescue NetworkDownError, CompactIndexClient::Updater::MisMatchedChecksumError => e
rescue NetworkDownError, CompactIndexClient::Updater::MismatchedChecksumError => e
raise HTTPError, e.message
rescue AuthenticationRequiredError, BadAuthenticationError
# Fail since we got a 401 from the server.
@ -62,7 +62,7 @@ module Bundler
end
# Read info file checksums out of /versions, so we can know if gems are up to date
compact_index_client.update_and_parse_checksums!
rescue CompactIndexClient::Updater::MisMatchedChecksumError => e
rescue CompactIndexClient::Updater::MismatchedChecksumError => e
Bundler.ui.debug(e.message)
nil
end

View File

@ -193,6 +193,21 @@ module Bundler
Digest(name)
end
def checksum_for_file(path, digest)
return unless path.file?
# This must use File.read instead of Digest.file().hexdigest
# because we need to preserve \n line endings on windows when calculating
# the checksum
SharedHelpers.filesystem_access(path, :read) do
File.open(path, "rb") do |f|
digest = SharedHelpers.digest(digest).new
buf = String.new(:capacity => 16_384, :encoding => Encoding::BINARY)
digest << buf while f.read(16_384, buf)
digest.hexdigest
end
end
end
def write_to_gemfile(gemfile_path, contents)
filesystem_access(gemfile_path) {|g| File.open(g, "w") {|file| file.puts contents } }
end

View File

@ -6,21 +6,202 @@ require "bundler/compact_index_client/updater"
require "tmpdir"
RSpec.describe Bundler::CompactIndexClient::Updater do
subject(:updater) { described_class.new(fetcher) }
let(:fetcher) { double(:fetcher) }
let(:local_path) { Pathname.new Dir.mktmpdir("localpath") }
let(:local_path) { Pathname.new(Dir.mktmpdir("localpath")).join("versions") }
let(:etag_path) { Pathname.new(Dir.mktmpdir("localpath-etags")).join("versions.etag") }
let(:remote_path) { double(:remote_path) }
let!(:updater) { described_class.new(fetcher) }
let(:full_body) { "abc123" }
let(:response) { double(:response, :body => full_body, :is_a? => false) }
let(:digest) { Digest::SHA256.base64digest(full_body) }
context "when the local path does not exist" do
before do
allow(response).to receive(:[]).with("Repr-Digest") { nil }
allow(response).to receive(:[]).with("Digest") { nil }
allow(response).to receive(:[]).with("ETag") { "thisisanetag" }
end
it "downloads the file without attempting append" do
expect(fetcher).to receive(:call).once.with(remote_path, {}) { response }
updater.update(remote_path, local_path, etag_path)
expect(local_path.read).to eq(full_body)
expect(etag_path.read).to eq("thisisanetag")
end
it "fails immediately on bad checksum" do
expect(fetcher).to receive(:call).once.with(remote_path, {}) { response }
allow(response).to receive(:[]).with("Repr-Digest") { "sha-256=:baddigest:" }
expect do
updater.update(remote_path, local_path, etag_path)
end.to raise_error(Bundler::CompactIndexClient::Updater::MismatchedChecksumError)
end
end
context "when the local path exists" do
let(:local_body) { "abc" }
before do
local_path.open("w") {|f| f.write(local_body) }
end
context "with an etag" do
before do
etag_path.open("w") {|f| f.write("LocalEtag") }
end
let(:headers) do
{
"If-None-Match" => "LocalEtag",
"Range" => "bytes=2-",
}
end
it "does nothing if etags match" do
expect(fetcher).to receive(:call).once.with(remote_path, headers).and_return(response)
allow(response).to receive(:is_a?).with(Net::HTTPPartialContent) { false }
allow(response).to receive(:is_a?).with(Net::HTTPNotModified) { true }
updater.update(remote_path, local_path, etag_path)
expect(local_path.read).to eq("abc")
expect(etag_path.read).to eq("LocalEtag")
end
it "appends the file if etags do not match" do
expect(fetcher).to receive(:call).once.with(remote_path, headers).and_return(response)
allow(response).to receive(:[]).with("Repr-Digest") { "sha-256=:#{digest}:" }
allow(response).to receive(:[]).with("ETag") { "NewEtag" }
allow(response).to receive(:is_a?).with(Net::HTTPPartialContent) { true }
allow(response).to receive(:is_a?).with(Net::HTTPNotModified) { false }
allow(response).to receive(:body) { "c123" }
updater.update(remote_path, local_path, etag_path)
expect(local_path.read).to eq(full_body)
expect(etag_path.read).to eq("NewEtag")
end
it "replaces the file if response ignores range" do
expect(fetcher).to receive(:call).once.with(remote_path, headers).and_return(response)
allow(response).to receive(:[]).with("Repr-Digest") { "sha-256=:#{digest}:" }
allow(response).to receive(:[]).with("ETag") { "NewEtag" }
allow(response).to receive(:body) { full_body }
updater.update(remote_path, local_path, etag_path)
expect(local_path.read).to eq(full_body)
expect(etag_path.read).to eq("NewEtag")
end
it "tries the request again if the partial response fails digest check" do
allow(response).to receive(:[]).with("Repr-Digest") { "sha-256=:baddigest:" }
allow(response).to receive(:body) { "the beginning of the file changed" }
allow(response).to receive(:is_a?).with(Net::HTTPPartialContent) { true }
expect(fetcher).to receive(:call).once.with(remote_path, headers).and_return(response)
full_response = double(:full_response, :body => full_body, :is_a? => false)
allow(full_response).to receive(:[]).with("Repr-Digest") { "sha-256=:#{digest}:" }
allow(full_response).to receive(:[]).with("ETag") { "NewEtag" }
expect(fetcher).to receive(:call).once.with(remote_path, { "If-None-Match" => "LocalEtag" }).and_return(full_response)
updater.update(remote_path, local_path, etag_path)
expect(local_path.read).to eq(full_body)
expect(etag_path.read).to eq("NewEtag")
end
end
context "without an etag file" do
let(:headers) do
{
"Range" => "bytes=2-",
# This MD5 feature should be deleted after sufficient time has passed since release.
# From then on, requests that still don't have a saved etag will be made without this header.
"If-None-Match" => Digest::MD5.hexdigest(local_body),
}
end
it "saves only the etag_path if generated etag matches" do
expect(fetcher).to receive(:call).once.with(remote_path, headers).and_return(response)
allow(response).to receive(:is_a?).with(Net::HTTPPartialContent) { false }
allow(response).to receive(:is_a?).with(Net::HTTPNotModified) { true }
updater.update(remote_path, local_path, etag_path)
expect(local_path.read).to eq("abc")
expect(etag_path.read).to eq(headers["If-None-Match"])
end
it "appends the file" do
expect(fetcher).to receive(:call).once.with(remote_path, headers).and_return(response)
allow(response).to receive(:[]).with("Repr-Digest") { "sha-256=:#{digest}:" }
allow(response).to receive(:[]).with("ETag") { "OpaqueEtag" }
allow(response).to receive(:is_a?).with(Net::HTTPPartialContent) { true }
allow(response).to receive(:is_a?).with(Net::HTTPNotModified) { false }
allow(response).to receive(:body) { "c123" }
updater.update(remote_path, local_path, etag_path)
expect(local_path.read).to eq(full_body)
expect(etag_path.read).to eq("OpaqueEtag")
end
it "replaces the file on full file response that ignores range request" do
expect(fetcher).to receive(:call).once.with(remote_path, headers).and_return(response)
allow(response).to receive(:[]).with("Repr-Digest") { nil }
allow(response).to receive(:[]).with("Digest") { nil }
allow(response).to receive(:[]).with("ETag") { "OpaqueEtag" }
allow(response).to receive(:is_a?).with(Net::HTTPPartialContent) { false }
allow(response).to receive(:is_a?).with(Net::HTTPNotModified) { false }
allow(response).to receive(:body) { full_body }
updater.update(remote_path, local_path, etag_path)
expect(local_path.read).to eq(full_body)
expect(etag_path.read).to eq("OpaqueEtag")
end
it "tries the request again if the partial response fails digest check" do
allow(response).to receive(:[]).with("Repr-Digest") { "sha-256=:baddigest:" }
allow(response).to receive(:body) { "the beginning of the file changed" }
allow(response).to receive(:is_a?).with(Net::HTTPPartialContent) { true }
expect(fetcher).to receive(:call).once.with(remote_path, headers) do
# During the failed first request, we simulate another process writing the etag.
# This ensures the second request doesn't generate the md5 etag again but just uses whatever is written.
etag_path.open("w") {|f| f.write("LocalEtag") }
response
end
full_response = double(:full_response, :body => full_body, :is_a? => false)
allow(full_response).to receive(:[]).with("Repr-Digest") { "sha-256=:#{digest}:" }
allow(full_response).to receive(:[]).with("ETag") { "NewEtag" }
expect(fetcher).to receive(:call).once.with(remote_path, { "If-None-Match" => "LocalEtag" }).and_return(full_response)
updater.update(remote_path, local_path, etag_path)
expect(local_path.read).to eq(full_body)
expect(etag_path.read).to eq("NewEtag")
end
end
end
context "when the ETag header is missing" do
# Regression test for https://github.com/rubygems/bundler/issues/5463
let(:response) { double(:response, :body => "abc123") }
let(:response) { double(:response, :body => full_body) }
it "treats the response as an update" do
expect(response).to receive(:[]).with("ETag") { nil }
allow(response).to receive(:[]).with("Repr-Digest") { nil }
allow(response).to receive(:[]).with("Digest") { nil }
allow(response).to receive(:[]).with("ETag") { nil }
expect(fetcher).to receive(:call) { response }
updater.update(local_path, remote_path)
updater.update(remote_path, local_path, etag_path)
end
end
@ -31,7 +212,7 @@ RSpec.describe Bundler::CompactIndexClient::Updater do
expect(fetcher).to receive(:call).and_raise(Zlib::GzipFile::Error)
expect do
updater.update(local_path, remote_path)
updater.update(remote_path, local_path, etag_path)
end.to raise_error(Bundler::HTTPError)
end
end
@ -46,10 +227,12 @@ RSpec.describe Bundler::CompactIndexClient::Updater do
begin
$VERBOSE = false
Encoding.default_internal = "ASCII"
expect(response).to receive(:[]).with("ETag") { nil }
allow(response).to receive(:[]).with("Repr-Digest") { nil }
allow(response).to receive(:[]).with("Digest") { nil }
allow(response).to receive(:[]).with("ETag") { nil }
expect(fetcher).to receive(:call) { response }
updater.update(local_path, remote_path)
updater.update(remote_path, local_path, etag_path)
ensure
Encoding.default_internal = previous_internal_encoding
$VERBOSE = old_verbose

View File

@ -157,9 +157,8 @@ RSpec.describe "compact index api" do
bundle :install, :verbose => true, :artifice => "compact_index_checksum_mismatch"
expect(out).to include("Fetching gem metadata from #{source_uri}")
expect(out).to include <<-'WARN'
The checksum of /versions does not match the checksum provided by the server! Something is wrong (local checksum is "\"d41d8cd98f00b204e9800998ecf8427e\"", was expecting "\"123\"").
WARN
expect(out).to include("The checksum of /versions does not match the checksum provided by the server!")
expect(out).to include('Calculated checksums {"sha-256"=>"8KfZiM/fszVkqhP/m5s9lvE6M9xKu4I1bU4Izddp5Ms="} did not match expected {"sha-256"=>"ungWv48Bz+pBQUDeXa4iI7ADYaOWF3qctBD/YfIAFa0="}')
expect(the_bundle).to include_gems "rack 1.0.0"
end
@ -169,10 +168,12 @@ The checksum of /versions does not match the checksum provided by the server! So
gem "rack"
G
versions = File.join(Bundler.rubygems.user_home, ".bundle", "cache", "compact_index",
"localgemserver.test.80.dd34752a738ee965a2a4298dc16db6c5", "versions")
FileUtils.mkdir_p(File.dirname(versions))
FileUtils.touch(versions)
versions = Pathname.new(Bundler.rubygems.user_home).join(
".bundle", "cache", "compact_index",
"localgemserver.test.80.dd34752a738ee965a2a4298dc16db6c5", "versions"
)
versions.dirname.mkpath
versions.write("created_at")
FileUtils.chmod("-r", versions)
bundle :install, :artifice => "compact_index", :raise_on_error => false
@ -772,23 +773,83 @@ The checksum of /versions does not match the checksum provided by the server! So
end
end
it "performs partial update with a non-empty range" do
it "performs update with etag not-modified" do
versions_etag = Pathname.new(Bundler.rubygems.user_home).join(
".bundle", "cache", "compact_index",
"localgemserver.test.80.dd34752a738ee965a2a4298dc16db6c5", "versions.etag"
)
expect(versions_etag.file?).to eq(false)
gemfile <<-G
source "#{source_uri}"
gem 'rack', '0.9.1'
G
# Initial install creates the cached versions file
# Initial install creates the cached versions file and etag file
bundle :install, :artifice => "compact_index"
expect(versions_etag.file?).to eq(true)
previous_content = versions_etag.binread
# Update the Gemfile so we can check subsequent install was successful
gemfile <<-G
source "#{source_uri}"
gem 'rack', '1.0.0'
G
# Second install should make only a partial request to /versions
bundle :install, :artifice => "compact_index_partial_update"
# Second install should match etag
bundle :install, :artifice => "compact_index_etag_match"
expect(versions_etag.binread).to eq(previous_content)
expect(the_bundle).to include_gems "rack 1.0.0"
end
it "performs full update when range is ignored" do
gemfile <<-G
source "#{source_uri}"
gem 'rack', '0.9.1'
G
# Initial install creates the cached versions file and etag file
bundle :install, :artifice => "compact_index"
gemfile <<-G
source "#{source_uri}"
gem 'rack', '1.0.0'
G
versions = Pathname.new(Bundler.rubygems.user_home).join(
".bundle", "cache", "compact_index",
"localgemserver.test.80.dd34752a738ee965a2a4298dc16db6c5", "versions"
)
# Modify the cached file. The ranged request will be based on this but,
# in this test, the range is ignored so this gets overwritten, allowing install.
versions.write "ruining this file"
bundle :install, :artifice => "compact_index_range_ignored"
expect(the_bundle).to include_gems "rack 1.0.0"
end
it "performs partial update with a non-empty range" do
build_repo4 do
build_gem "rack", "0.9.1"
end
# Initial install creates the cached versions file
install_gemfile <<-G, :artifice => "compact_index", :env => { "BUNDLER_SPEC_GEM_REPO" => gem_repo4.to_s }
source "#{source_uri}"
gem 'rack', '0.9.1'
G
update_repo4 do
build_gem "rack", "1.0.0"
end
install_gemfile <<-G, :artifice => "compact_index_partial_update", :env => { "BUNDLER_SPEC_GEM_REPO" => gem_repo4.to_s }
source "#{source_uri}"
gem 'rack', '1.0.0'
G
expect(the_bundle).to include_gems "rack 1.0.0"
end
@ -799,19 +860,21 @@ The checksum of /versions does not match the checksum provided by the server! So
gem 'rack'
G
# Create an empty file to trigger a partial download
versions = File.join(Bundler.rubygems.user_home, ".bundle", "cache", "compact_index",
"localgemserver.test.80.dd34752a738ee965a2a4298dc16db6c5", "versions")
FileUtils.mkdir_p(File.dirname(versions))
FileUtils.touch(versions)
# Create a partial cache versions file
versions = Pathname.new(Bundler.rubygems.user_home).join(
".bundle", "cache", "compact_index",
"localgemserver.test.80.dd34752a738ee965a2a4298dc16db6c5", "versions"
)
versions.dirname.mkpath
versions.write("created_at")
bundle :install, :artifice => "compact_index_concurrent_download"
expect(File.read(versions)).to start_with("created_at")
expect(versions.read).to start_with("created_at")
expect(the_bundle).to include_gems "rack 1.0.0"
end
it "performs full update if server endpoints serve partial content responses but don't have incremental content and provide no Etag" do
it "performs a partial update that fails digest check, then a full update" do
build_repo4 do
build_gem "rack", "0.9.1"
end
@ -825,7 +888,29 @@ The checksum of /versions does not match the checksum provided by the server! So
build_gem "rack", "1.0.0"
end
install_gemfile <<-G, :artifice => "compact_index_partial_update_no_etag_not_incremental", :env => { "BUNDLER_SPEC_GEM_REPO" => gem_repo4.to_s }
install_gemfile <<-G, :artifice => "compact_index_partial_update_bad_digest", :env => { "BUNDLER_SPEC_GEM_REPO" => gem_repo4.to_s }
source "#{source_uri}"
gem 'rack', '1.0.0'
G
expect(the_bundle).to include_gems "rack 1.0.0"
end
it "performs full update if server endpoints serve partial content responses but don't have incremental content and provide no digest" do
build_repo4 do
build_gem "rack", "0.9.1"
end
install_gemfile <<-G, :artifice => "compact_index", :env => { "BUNDLER_SPEC_GEM_REPO" => gem_repo4.to_s }
source "#{source_uri}"
gem 'rack', '0.9.1'
G
update_repo4 do
build_gem "rack", "1.0.0"
end
install_gemfile <<-G, :artifice => "compact_index_partial_update_no_digest_not_incremental", :env => { "BUNDLER_SPEC_GEM_REPO" => gem_repo4.to_s }
source "#{source_uri}"
gem 'rack', '1.0.0'
G
@ -847,7 +932,7 @@ The checksum of /versions does not match the checksum provided by the server! So
expected_rack_info_content = File.read(rake_info_path)
# Modify the cache files. We expect them to be reset to the normal ones when we re-run :install
File.open(rake_info_path, "w") {|f| f << (expected_rack_info_content + "this is different") }
File.open(rake_info_path, "a") {|f| f << "this is different" }
# Update the Gemfile so the next install does its normal things
gemfile <<-G

View File

@ -4,10 +4,10 @@ require_relative "helpers/compact_index"
class CompactIndexChecksumMismatch < CompactIndexAPI
get "/versions" do
headers "ETag" => quote("123")
headers "Repr-Digest" => "sha-256=:ungWv48Bz+pBQUDeXa4iI7ADYaOWF3qctBD/YfIAFa0=:"
headers "Surrogate-Control" => "max-age=2592000, stale-while-revalidate=60"
content_type "text/plain"
body ""
body "content does not match the checksum"
end
end

View File

@ -7,11 +7,12 @@ class CompactIndexConcurrentDownload < CompactIndexAPI
versions = File.join(Bundler.rubygems.user_home, ".bundle", "cache", "compact_index",
"localgemserver.test.80.dd34752a738ee965a2a4298dc16db6c5", "versions")
# Verify the original (empty) content hasn't been deleted, e.g. on a retry
File.binread(versions) == "" || raise("Original file should be present and empty")
# Verify the original content hasn't been deleted, e.g. on a retry
data = File.binread(versions)
data == "created_at" || raise("Original file should be present with expected content")
# Verify this is only requested once for a partial download
env["HTTP_RANGE"] || raise("Missing Range header for expected partial download")
env["HTTP_RANGE"] == "bytes=#{data.bytesize - 1}-" || raise("Missing Range header for expected partial download")
# Overwrite the file in parallel, which should be then overwritten
# after a successful download to prevent corruption

View File

@ -0,0 +1,16 @@
# frozen_string_literal: true
require_relative "helpers/compact_index"
class CompactIndexEtagMatch < CompactIndexAPI
get "/versions" do
raise "ETag header should be present" unless env["HTTP_IF_NONE_MATCH"]
headers "ETag" => env["HTTP_IF_NONE_MATCH"]
status 304
body ""
end
end
require_relative "helpers/artifice"
Artifice.activate_with(CompactIndexEtagMatch)

View File

@ -23,7 +23,7 @@ class CompactIndexPartialUpdate < CompactIndexAPI
# Verify that a partial request is made, starting from the index of the
# final byte of the cached file.
unless env["HTTP_RANGE"] == "bytes=#{File.binread(cached_versions_path).bytesize - 1}-"
raise("Range header should be present, and start from the index of the final byte of the cache.")
raise("Range header should be present, and start from the index of the final byte of the cache. #{env["HTTP_RANGE"].inspect}")
end
etag_response do

View File

@ -0,0 +1,40 @@
# frozen_string_literal: true
require_relative "helpers/compact_index"
# The purpose of this Artifice is to test that an incremental response is invalidated
# and a second request is issued for the full content.
class CompactIndexPartialUpdateBadDigest < CompactIndexAPI
def partial_update_bad_digest
response_body = yield
if request.env["HTTP_RANGE"]
headers "Repr-Digest" => "sha-256=:#{Digest::SHA256.base64digest("wrong digest on ranged request")}:"
else
headers "Repr-Digest" => "sha-256=:#{Digest::SHA256.base64digest(response_body)}:"
end
headers "Surrogate-Control" => "max-age=2592000, stale-while-revalidate=60"
content_type "text/plain"
requested_range_for(response_body)
end
get "/versions" do
partial_update_bad_digest do
file = tmp("versions.list")
FileUtils.rm_f(file)
file = CompactIndex::VersionsFile.new(file.to_s)
file.create(gems)
file.contents([], :calculate_info_checksums => true)
end
end
get "/info/:name" do
partial_update_bad_digest do
gem = gems.find {|g| g.name == params[:name] }
CompactIndex.info(gem ? gem.versions : [])
end
end
end
require_relative "helpers/artifice"
Artifice.activate_with(CompactIndexPartialUpdateBadDigest)

View File

@ -2,8 +2,10 @@
require_relative "helpers/compact_index"
class CompactIndexPartialUpdateNoEtagNotIncremental < CompactIndexAPI
def partial_update_no_etag
# The purpose of this Artifice is to test that an incremental response is ignored
# when the digest is not present to verify that the partial response is valid.
class CompactIndexPartialUpdateNoDigestNotIncremental < CompactIndexAPI
def partial_update_no_digest
response_body = yield
headers "Surrogate-Control" => "max-age=2592000, stale-while-revalidate=60"
content_type "text/plain"
@ -11,7 +13,7 @@ class CompactIndexPartialUpdateNoEtagNotIncremental < CompactIndexAPI
end
get "/versions" do
partial_update_no_etag do
partial_update_no_digest do
file = tmp("versions.list")
FileUtils.rm_f(file)
file = CompactIndex::VersionsFile.new(file.to_s)
@ -25,7 +27,7 @@ class CompactIndexPartialUpdateNoEtagNotIncremental < CompactIndexAPI
end
get "/info/:name" do
partial_update_no_etag do
partial_update_no_digest do
gem = gems.find {|g| g.name == params[:name] }
lines = CompactIndex.info(gem ? gem.versions : []).split("\n")
@ -37,4 +39,4 @@ end
require_relative "helpers/artifice"
Artifice.activate_with(CompactIndexPartialUpdateNoEtagNotIncremental)
Artifice.activate_with(CompactIndexPartialUpdateNoDigestNotIncremental)

View File

@ -0,0 +1,40 @@
# frozen_string_literal: true
require_relative "helpers/compact_index"
class CompactIndexRangeIgnored < CompactIndexAPI
# Stub the server to not return 304 so that we don't bypass all the logic
def not_modified?(_checksum)
false
end
get "/versions" do
cached_versions_path = File.join(
Bundler.rubygems.user_home, ".bundle", "cache", "compact_index",
"localgemserver.test.80.dd34752a738ee965a2a4298dc16db6c5", "versions"
)
# Verify a cached copy of the versions file exists
unless File.binread(cached_versions_path).size > 0
raise("Cached versions file should be present and have content")
end
# Verify that a partial request is made, starting from the index of the
# final byte of the cached file.
unless env.delete("HTTP_RANGE")
raise("Expected client to write the full response on the first try")
end
etag_response do
file = tmp("versions.list")
FileUtils.rm_f(file)
file = CompactIndex::VersionsFile.new(file.to_s)
file.create(gems)
file.contents
end
end
end
require_relative "helpers/artifice"
Artifice.activate_with(CompactIndexRangeIgnored)

View File

@ -4,6 +4,7 @@ require_relative "endpoint"
$LOAD_PATH.unshift Dir[Spec::Path.base_system_gem_path.join("gems/compact_index*/lib")].first.to_s
require "compact_index"
require "digest"
class CompactIndexAPI < Endpoint
helpers do
@ -17,9 +18,10 @@ class CompactIndexAPI < Endpoint
def etag_response
response_body = yield
checksum = Digest(:MD5).hexdigest(response_body)
return if not_modified?(checksum)
headers "ETag" => quote(checksum)
etag = Digest::MD5.hexdigest(response_body)
return if not_modified?(etag)
headers "ETag" => quote(etag)
headers "Repr-Digest" => "sha-256=:#{Digest::SHA256.base64digest(response_body)}:"
headers "Surrogate-Control" => "max-age=2592000, stale-while-revalidate=60"
content_type "text/plain"
requested_range_for(response_body)
@ -29,11 +31,11 @@ class CompactIndexAPI < Endpoint
raise
end
def not_modified?(checksum)
def not_modified?(etag)
etags = parse_etags(request.env["HTTP_IF_NONE_MATCH"])
return unless etags.include?(checksum)
headers "ETag" => quote(checksum)
return unless etags.include?(etag)
headers "ETag" => quote(etag)
status 304
body ""
end