diff --git a/ext/objspace/objspace_dump.c b/ext/objspace/objspace_dump.c index b570acbd95..18e052f210 100644 --- a/ext/objspace/objspace_dump.c +++ b/ext/objspace/objspace_dump.c @@ -313,6 +313,16 @@ reachable_object_i(VALUE ref, void *data) dc->cur_obj_references++; } +bool +dump_string_ascii_only(const char *str, long size) { + for (long i = 0; i < size; i++) { + if (str[i] & 0x80) { + return false; + } + } + return true; +} + static void dump_append_string_content(struct dump_config *dc, VALUE obj) { @@ -323,9 +333,17 @@ dump_append_string_content(struct dump_config *dc, VALUE obj) dump_append_sizet(dc, rb_str_capacity(obj)); } - if (is_ascii_string(obj)) { - dump_append(dc, ", \"value\":"); - dump_append_string_value(dc, obj); + if (RSTRING_LEN(obj) && rb_enc_asciicompat(rb_enc_from_index(ENCODING_GET(obj)))) { + int cr = ENC_CODERANGE(obj); + if (cr == RUBY_ENC_CODERANGE_UNKNOWN) { + if (dump_string_ascii_only(RSTRING_PTR(obj), RSTRING_LEN(obj))) { + cr = RUBY_ENC_CODERANGE_7BIT; + } + } + if (cr == RUBY_ENC_CODERANGE_7BIT) { + dump_append(dc, ", \"value\":"); + dump_append_string_value(dc, obj); + } } } @@ -389,8 +407,6 @@ dump_object(VALUE obj, struct dump_config *dc) case T_STRING: if (STR_EMBED_P(obj)) dump_append(dc, ", \"embedded\":true"); - if (is_broken_string(obj)) - dump_append(dc, ", \"broken\":true"); if (FL_TEST(obj, RSTRING_FSTR)) dump_append(dc, ", \"fstring\":true"); if (STR_SHARED_P(obj)) @@ -403,6 +419,27 @@ dump_object(VALUE obj, struct dump_config *dc) dump_append(dc, rb_enc_name(rb_enc_from_index(ENCODING_GET(obj)))); dump_append(dc, "\""); } + + dump_append(dc, ", \"coderange\":\""); + switch (RB_ENC_CODERANGE(obj)) { + case RUBY_ENC_CODERANGE_UNKNOWN: + dump_append(dc, "unknown"); + break; + case RUBY_ENC_CODERANGE_7BIT: + dump_append(dc, "7bit"); + break; + case RUBY_ENC_CODERANGE_VALID: + dump_append(dc, "valid"); + break; + case RUBY_ENC_CODERANGE_BROKEN: + dump_append(dc, "broken"); + break; + } + dump_append(dc, "\""); + + if (RB_ENC_CODERANGE(obj) == RUBY_ENC_CODERANGE_BROKEN) + dump_append(dc, ", \"broken\":true"); + break; case T_HASH: diff --git a/test/objspace/test_objspace.rb b/test/objspace/test_objspace.rb index ed9c998597..1392447e4f 100644 --- a/test/objspace/test_objspace.rb +++ b/test/objspace/test_objspace.rb @@ -486,6 +486,16 @@ class TestObjSpace < Test::Unit::TestCase end end + def test_dump_string_coderange + assert_includes ObjectSpace.dump("TEST STRING"), '"coderange":"7bit"' + unknown = "TEST STRING".dup.force_encoding(Encoding::BINARY) + 2.times do # ensure that dumping the string doesn't mutate it + assert_includes ObjectSpace.dump(unknown), '"coderange":"unknown"' + end + assert_includes ObjectSpace.dump("Fée"), '"coderange":"valid"' + assert_includes ObjectSpace.dump("\xFF"), '"coderange":"broken"' + end + def test_dump_escapes_method_name method_name = "foo\"bar" klass = Class.new do