Fix coderange calculation in String#b

Leave the new coderange unknown if the original encoding is not
ASCII-compatible. Non-ASCII-compatible encoding strings with valid or
broken coderange can end up as ascii-only.

Fixes 9a8f6e392fbd ("Cheaply derive code range for String#b return
value", 2022-07-25).
This commit is contained in:
Kazuki Yamaguchi 2022-09-26 16:39:53 +09:00
parent 1c14e406d3
commit 5b0396473b

View File

@ -10771,20 +10771,22 @@ rb_str_b(VALUE str)
} }
str_replace_shared_without_enc(str2, str); str_replace_shared_without_enc(str2, str);
// BINARY strings can never be broken; they're either 7-bit ASCII or VALID. if (rb_enc_asciicompat(STR_ENC_GET(str))) {
// If we know the receiver's code range then we know the result's code range. // BINARY strings can never be broken; they're either 7-bit ASCII or VALID.
int cr = ENC_CODERANGE(str); // If we know the receiver's code range then we know the result's code range.
switch (cr) { int cr = ENC_CODERANGE(str);
case ENC_CODERANGE_7BIT: switch (cr) {
ENC_CODERANGE_SET(str2, ENC_CODERANGE_7BIT); case ENC_CODERANGE_7BIT:
break; ENC_CODERANGE_SET(str2, ENC_CODERANGE_7BIT);
case ENC_CODERANGE_BROKEN: break;
case ENC_CODERANGE_VALID: case ENC_CODERANGE_BROKEN:
ENC_CODERANGE_SET(str2, ENC_CODERANGE_VALID); case ENC_CODERANGE_VALID:
break; ENC_CODERANGE_SET(str2, ENC_CODERANGE_VALID);
default: break;
ENC_CODERANGE_CLEAR(str2); default:
break; ENC_CODERANGE_CLEAR(str2);
break;
}
} }
return str2; return str2;