string.c: Add fastpath to single_byte_optimizable

`rb_enc_from_index` is a costly operation so it is worth avoiding
to call it for the common encodings.

Also in the case of UTF-8, it's more efficient to scan the
coderange if it is unknown that to fallback to the slower
algorithms.
This commit is contained in:
Jean Boussier 2024-08-09 15:24:49 +02:00
parent 5a570421a5
commit a332367dad
Notes: git 2024-08-09 20:07:03 +00:00

View File

@ -594,22 +594,30 @@ fstring_cmp(VALUE a, VALUE b)
memcmp(aptr, bptr, alen) != 0); memcmp(aptr, bptr, alen) != 0);
} }
static inline int static inline bool
single_byte_optimizable(VALUE str) single_byte_optimizable(VALUE str)
{ {
rb_encoding *enc; int encindex = ENCODING_GET(str);
switch (encindex) {
case ENCINDEX_ASCII_8BIT:
case ENCINDEX_US_ASCII:
return true;
case ENCINDEX_UTF_8:
// For UTF-8 it's worth scanning the string coderange when unknown.
return rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT;
}
/* Conservative. It may be ENC_CODERANGE_UNKNOWN. */ /* Conservative. It may be ENC_CODERANGE_UNKNOWN. */
if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) {
return 1; return true;
}
enc = STR_ENC_GET(str); if (rb_enc_mbmaxlen(rb_enc_from_index(encindex)) == 1) {
if (rb_enc_mbmaxlen(enc) == 1) return true;
return 1; }
/* Conservative. Possibly single byte. /* Conservative. Possibly single byte.
* "\xa1" in Shift_JIS for example. */ * "\xa1" in Shift_JIS for example. */
return 0; return false;
} }
VALUE rb_fs; VALUE rb_fs;