Fix crash when checking symbol encoding

[Bug #20245]

We sometimes pass in a fake string to sym_check_asciionly. This can crash
if sym_check_asciionly raises because it creates a CFP with the fake
string as the receiver which will crash if GC tries to mark the CFP.

For example, the following script crashes:

    GC.stress = true
    Object.const_defined?("\xC3")
This commit is contained in:
Peter Zhu 2024-02-07 11:30:20 -05:00
parent 6756dbf3bb
commit 01fd262e62
2 changed files with 18 additions and 7 deletions

View File

@ -581,11 +581,14 @@ register_static_symid_str(ID id, VALUE str)
} }
static int static int
sym_check_asciionly(VALUE str) sym_check_asciionly(VALUE str, bool fake_str)
{ {
if (!rb_enc_asciicompat(rb_enc_get(str))) return FALSE; if (!rb_enc_asciicompat(rb_enc_get(str))) return FALSE;
switch (rb_enc_str_coderange(str)) { switch (rb_enc_str_coderange(str)) {
case ENC_CODERANGE_BROKEN: case ENC_CODERANGE_BROKEN:
if (fake_str) {
str = rb_enc_str_new(RSTRING_PTR(str), RSTRING_LEN(str), rb_enc_get(str));
}
rb_raise(rb_eEncodingError, "invalid symbol in encoding %s :%+"PRIsVALUE, rb_raise(rb_eEncodingError, "invalid symbol in encoding %s :%+"PRIsVALUE,
rb_enc_name(rb_enc_get(str)), str); rb_enc_name(rb_enc_get(str)), str);
case ENC_CODERANGE_7BIT: case ENC_CODERANGE_7BIT:
@ -778,7 +781,7 @@ intern_str(VALUE str, int mutable)
id = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN); id = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN);
if (id == (ID)-1) id = ID_JUNK; if (id == (ID)-1) id = ID_JUNK;
if (sym_check_asciionly(str)) { if (sym_check_asciionly(str, false)) {
if (!mutable) str = rb_str_dup(str); if (!mutable) str = rb_str_dup(str);
rb_enc_associate(str, rb_usascii_encoding()); rb_enc_associate(str, rb_usascii_encoding());
} }
@ -869,7 +872,7 @@ rb_str_intern(VALUE str)
else if (USE_SYMBOL_GC) { else if (USE_SYMBOL_GC) {
rb_encoding *enc = rb_enc_get(str); rb_encoding *enc = rb_enc_get(str);
rb_encoding *ascii = rb_usascii_encoding(); rb_encoding *ascii = rb_usascii_encoding();
if (enc != ascii && sym_check_asciionly(str)) { if (enc != ascii && sym_check_asciionly(str, false)) {
str = rb_str_dup(str); str = rb_str_dup(str);
rb_enc_associate(str, ascii); rb_enc_associate(str, ascii);
OBJ_FREEZE(str); OBJ_FREEZE(str);
@ -1116,7 +1119,7 @@ rb_check_id(volatile VALUE *namep)
*namep = name; *namep = name;
} }
sym_check_asciionly(name); sym_check_asciionly(name, false);
return lookup_str_id(name); return lookup_str_id(name);
} }
@ -1175,7 +1178,7 @@ rb_check_symbol(volatile VALUE *namep)
*namep = name; *namep = name;
} }
sym_check_asciionly(name); sym_check_asciionly(name, false);
if ((sym = lookup_str_sym(name)) != 0) { if ((sym = lookup_str_sym(name)) != 0) {
return sym; return sym;
@ -1190,7 +1193,7 @@ rb_check_id_cstr(const char *ptr, long len, rb_encoding *enc)
struct RString fake_str; struct RString fake_str;
const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc); const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc);
sym_check_asciionly(name); sym_check_asciionly(name, true);
return lookup_str_id(name); return lookup_str_id(name);
} }
@ -1202,7 +1205,7 @@ rb_check_symbol_cstr(const char *ptr, long len, rb_encoding *enc)
struct RString fake_str; struct RString fake_str;
const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc); const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc);
sym_check_asciionly(name); sym_check_asciionly(name, true);
if ((sym = lookup_str_sym(name)) != 0) { if ((sym = lookup_str_sym(name)) != 0) {
return sym; return sym;

View File

@ -253,6 +253,14 @@ class TestModule < Test::Unit::TestCase
assert_operator(Math, :const_defined?, "PI") assert_operator(Math, :const_defined?, "PI")
assert_not_operator(Math, :const_defined?, :IP) assert_not_operator(Math, :const_defined?, :IP)
assert_not_operator(Math, :const_defined?, "IP") assert_not_operator(Math, :const_defined?, "IP")
# Test invalid symbol name
# [Bug #20245]
EnvUtil.under_gc_stress do
assert_raise(EncodingError) do
Math.const_defined?("\xC3")
end
end
end end
def each_bad_constants(m, &b) def each_bad_constants(m, &b)