[ruby/zlib] Zlib.gunzip should not fail with utf-8 strings

(https://github.com/ruby/zlib/pull/55)

zstream_discard_input was encoding and character-aware when given input is user-provided, so this discards `len` chars instead of `len` bytes.

Also Zlib.gunzip explains in its rdoc that it is equivalent with the following code, but this doesn't fail for UTF-8 String.

```ruby
string = %w[1f8b0800c28000000003cb48cdc9c9070086a6103605000000].pack("H*").force_encoding('UTF-8')
sio = StringIO.new(string)
p gz.read #=> "hello"
gz&.close
p Zlib.gunzip(string) #=> Zlib::DataError
```

Reported and discovered by eagletmt at https://twitter.com/eagletmt/status/1689692467929694209

https://github.com/ruby/zlib/commit/c5e58bc62a
This commit is contained in:
Sorah Fukumori 2023-08-11 05:12:06 +09:00 committed by git
parent b5b34c1f84
commit d2864ca330
2 changed files with 8 additions and 1 deletions

View File

@ -923,7 +923,7 @@ zstream_discard_input(struct zstream *z, long len)
z->input = Qnil; z->input = Qnil;
} }
else { else {
z->input = rb_str_substr(z->input, len, z->input = rb_str_subseq(z->input, len,
RSTRING_LEN(z->input) - len); RSTRING_LEN(z->input) - len);
} }
} }

View File

@ -1457,6 +1457,13 @@ if defined? Zlib
assert_raise(Zlib::GzipFile::Error){ Zlib.gunzip(src) } assert_raise(Zlib::GzipFile::Error){ Zlib.gunzip(src) }
end end
# Zlib.gunzip input is always considered a binary string, regardless of its String#encoding.
def test_gunzip_encoding
# vvvvvvvv = mtime, but valid UTF-8 string of U+0080
src = %w[1f8b0800c28000000003cb48cdc9c9070086a6103605000000].pack("H*").force_encoding('UTF-8')
assert_equal 'hello', Zlib.gunzip(src.freeze)
end
def test_gunzip_no_memory_leak def test_gunzip_no_memory_leak
assert_no_memory_leak(%[-rzlib], "#{<<~"{#"}", "#{<<~'};'}") assert_no_memory_leak(%[-rzlib], "#{<<~"{#"}", "#{<<~'};'}")
d = Zlib.gzip("data") d = Zlib.gzip("data")