rb_str_buf_append: add a fast path for ENC_CODERANGE_VALID
If the RHS has valid encoding, and both strings have the same encoding, we can use the fast path. However we need to update the LHS coderange. ``` compare-ruby: ruby 3.2.0dev (2022-07-21T14:46:32Z master cdbb9b8555) [arm64-darwin21] built-ruby: ruby 3.2.0dev (2022-07-25T07:25:41Z string-concat-vali.. 11a2772bdd) [arm64-darwin21] warming up... | |compare-ruby|built-ruby| |:-------------------|-----------:|---------:| |binary_concat_7bit | 554.816k| 556.460k| | | -| 1.00x| |utf8_concat_7bit | 556.367k| 555.101k| | | 1.00x| -| |utf8_concat_UTF8 | 412.555k| 556.824k| | | -| 1.35x| ```
This commit is contained in:
parent
f61dd38e5c
commit
31a5586d1e
Notes:
git
2022-07-25 21:19:16 +09:00
@ -1,9 +1,9 @@
|
|||||||
prelude: |
|
prelude: |
|
||||||
CHUNK = "a" * 64
|
CHUNK = "a" * 64
|
||||||
BCHUNK = "a".b * 64
|
UCHUNK = "é" * 32
|
||||||
GC.disable # GC causes a lot of variance
|
GC.disable # GC causes a lot of variance
|
||||||
benchmark:
|
benchmark:
|
||||||
binary_concat_utf8: |
|
binary_concat_7bit: |
|
||||||
buffer = String.new(capacity: 4096, encoding: Encoding::BINARY)
|
buffer = String.new(capacity: 4096, encoding: Encoding::BINARY)
|
||||||
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
|
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
|
||||||
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
|
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
|
||||||
@ -13,17 +13,7 @@ benchmark:
|
|||||||
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
|
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
|
||||||
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
|
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
|
||||||
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
|
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
|
||||||
binary_concat_binary: |
|
utf8_concat_7bit: |
|
||||||
buffer = String.new(capacity: 4096, encoding: Encoding::BINARY)
|
|
||||||
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
|
|
||||||
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
|
|
||||||
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
|
|
||||||
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
|
|
||||||
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
|
|
||||||
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
|
|
||||||
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
|
|
||||||
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
|
|
||||||
utf8_concat_utf8: |
|
|
||||||
buffer = String.new(capacity: 4096, encoding: Encoding::UTF_8)
|
buffer = String.new(capacity: 4096, encoding: Encoding::UTF_8)
|
||||||
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
|
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
|
||||||
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
|
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
|
||||||
@ -33,3 +23,13 @@ benchmark:
|
|||||||
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
|
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
|
||||||
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
|
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
|
||||||
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
|
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
|
||||||
|
utf8_concat_UTF8: |
|
||||||
|
buffer = String.new(capacity: 4096, encoding: Encoding::UTF_8)
|
||||||
|
buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK
|
||||||
|
buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK
|
||||||
|
buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK
|
||||||
|
buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK
|
||||||
|
buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK
|
||||||
|
buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK
|
||||||
|
buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK
|
||||||
|
buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK
|
||||||
|
17
string.c
17
string.c
@ -3329,9 +3329,24 @@ VALUE
|
|||||||
rb_str_buf_append(VALUE str, VALUE str2)
|
rb_str_buf_append(VALUE str, VALUE str2)
|
||||||
{
|
{
|
||||||
int str2_cr = rb_enc_str_coderange(str2);
|
int str2_cr = rb_enc_str_coderange(str2);
|
||||||
if (str2_cr == ENC_CODERANGE_7BIT && str_enc_fastpath(str)) {
|
|
||||||
|
if (str_enc_fastpath(str)) {
|
||||||
|
switch (str2_cr) {
|
||||||
|
case ENC_CODERANGE_7BIT:
|
||||||
|
// If RHS is 7bit we can do simple concatenation
|
||||||
str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2), true);
|
str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2), true);
|
||||||
return str;
|
return str;
|
||||||
|
case ENC_CODERANGE_VALID:
|
||||||
|
// If RHS is valid, we can do simple concatenation if encodings are the same
|
||||||
|
if (ENCODING_GET_INLINED(str) == ENCODING_GET_INLINED(str2)) {
|
||||||
|
str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2), true);
|
||||||
|
int str_cr = ENC_CODERANGE(str);
|
||||||
|
if (UNLIKELY(str_cr != ENC_CODERANGE_VALID)) {
|
||||||
|
ENC_CODERANGE_SET(str, RB_ENC_CODERANGE_AND(str_cr, str2_cr));
|
||||||
|
}
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
|
rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user