diff --git a/benchmark/string_concat.yml b/benchmark/string_concat.yml index da403e7a53..656bcd1cd7 100644 --- a/benchmark/string_concat.yml +++ b/benchmark/string_concat.yml @@ -1,8 +1,29 @@ prelude: | CHUNK = "a" * 64 + BCHUNK = "a".b * 64 benchmark: - string_concat: | - buffer = String.new(capacity: 4096) + binary_concat_utf8: | + buffer = String.new(capacity: 4096, encoding: Encoding::BINARY) + buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK + buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK + buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK + buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK + buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK + buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK + buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK + buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK + binary_concat_binary: | + buffer = String.new(capacity: 4096, encoding: Encoding::BINARY) + buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK + buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK + buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK + buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK + buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK + buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK + buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK + buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK + utf8_concat_utf8: | + buffer = String.new(capacity: 4096, encoding: Encoding::UTF_8) buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK diff --git a/string.c b/string.c index 4cca9b1eac..d6c79c7988 100644 --- a/string.c +++ b/string.c @@ -3303,12 +3303,28 @@ rb_str_buf_cat_ascii(VALUE str, const char *ptr) } } +static inline bool +str_enc_fastpath(VALUE str) +{ + // The overwhelming majority of strings are in one of these 3 encodings. + switch (ENCODING_GET_INLINED(str)) { + case ENCINDEX_ASCII_8BIT: + case ENCINDEX_UTF_8: + case ENCINDEX_US_ASCII: + return true; + default: + return false; + } +} + VALUE rb_str_buf_append(VALUE str, VALUE str2) { - int str2_cr; - - str2_cr = ENC_CODERANGE(str2); + int str2_cr = rb_enc_str_coderange(str2); + if (str2_cr == ENC_CODERANGE_7BIT && str_enc_fastpath(str)) { + str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2)); + return str; + } rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2), ENCODING_GET(str2), str2_cr, &str2_cr);