From 1f6dd9071c7994dd639d2e1cf2fe04e944173f17 Mon Sep 17 00:00:00 2001 From: John Hawthorn Date: Tue, 22 Oct 2024 19:22:37 -0700 Subject: [PATCH] Fix update_coderange for binary strings Although a binary (aka ASCII-8BIT) string will never have a broken coderange, it still has to differentiate between "valid" and "7bit". On Ruby 3.4/trunk this problem is masked because we now clear the coderange more agressively in rb_str_resize, and we happened to always be strinking this string, but we should not assume that. On Ruby 3.3 this created strings where `ascii_only?` was true in cases it shouldn't be as well as other problems. Fixes [Bug #20883] Co-authored-by: Daniel Colson Co-authored-by: Matthew Draper --- sprintf.c | 3 +-- test/ruby/test_sprintf.rb | 8 ++++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/sprintf.c b/sprintf.c index f1ae282123..9290ed726c 100644 --- a/sprintf.c +++ b/sprintf.c @@ -247,8 +247,7 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) } #define update_coderange(partial) do { \ - if (coderange != ENC_CODERANGE_BROKEN && scanned < blen \ - && rb_enc_to_index(enc) /* != ENCINDEX_ASCII_8BIT */) { \ + if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) { \ int cr = coderange; \ scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &cr); \ ENC_CODERANGE_SET(result, \ diff --git a/test/ruby/test_sprintf.rb b/test/ruby/test_sprintf.rb index 3bd3bc793c..1c7e89c265 100644 --- a/test/ruby/test_sprintf.rb +++ b/test/ruby/test_sprintf.rb @@ -546,4 +546,12 @@ class TestSprintf < Test::Unit::TestCase sprintf("%*s", RbConfig::LIMITS["INT_MIN"], "") end end + + def test_binary_format_coderange + 1.upto(500) do |i| + str = sprintf("%*s".b, i, "\xe2".b) + refute_predicate str, :ascii_only? + assert_equal i, str.bytesize + end + end end